abydos.distance._seqalign.needleman_wunsch() - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-08 03:44 UTC

abydos.distance._seqalign.needleman_wunsch() A

↳ Parent: abydos.distance._seqalign

Complexity

Conditions

Size

Total Lines	27
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	2
CRAP Score	1

Importance

Changes

Metric	Value
eloc	2
dl	0
loc	27
ccs	2
cts	2
cp	1
rs	10
c	0
b	0
f	0
cc	1
nop	4
crap	1

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.distance.seqalign.

The distance.seqalign module implements string edit distance functions
used in sequence alignment:

    - Matrix similarity
    - Needleman-Wunsch score
    - Smith-Waterman score
    - Gotoh score
"""

from __future__ import unicode_literals

from numpy import float32 as np_float32
from numpy import zeros as np_zeros

from six.moves import range

from ._basic import sim_ident
from ._distance import Distance

__all__ = [
    'Gotoh',
    'NeedlemanWunsch',
    'SmithWaterman',
    'gotoh',
    'needleman_wunsch',
    'smith_waterman',
]


class NeedlemanWunsch(Distance):

    """Needleman-Wunsch score.

    The Needleman-Wunsch score :cite:`Needleman:1970` is a standard edit
    distance measure.
    """

    @staticmethod
    def sim_matrix(

        src,

        tar,

        mat=None,

        mismatch_cost=0,

        match_cost=1,

        symmetric=True,

        alphabet=None,

    ):
        """Return the matrix similarity of two strings.

        With the default parameters, this is identical to sim_ident.
        It is possible for sim_matrix to return values outside of the range
        :math:`[0, 1]`, if values outside that range are present in mat,
        mismatch_cost, or match_cost.

        Args:
            src (str): Source string for comparison
            tar (str): Target string for comparison
            mat (dict): A dict mapping tuples to costs; the tuples are (src,
                tar) pairs of symbols from the alphabet parameter
            mismatch_cost (float): the value returned if (src, tar) is absent
                from mat when src does not equal tar
            match_cost (float): the value returned if (src, tar) is absent from
                mat when src equals tar
            symmetric (bool): True if the cost of src not matching tar is
                identical to the cost of tar not matching src; in this case,
                the values in mat need only contain (src, tar) or (tar, src),
                not both
            alphabet (str): a collection of tokens from which src and tar are
                drawn; if this is defined a ValueError is raised if either tar
                or src is not found in alphabet

        Returns:
            float: Matrix similarity

        Raises:
            ValueError: src value not in alphabet
            ValueError: tar value not in alphabet

        Examples:
            >>> NeedlemanWunsch.sim_matrix('cat', 'hat')
            0
            >>> NeedlemanWunsch.sim_matrix('hat', 'hat')
            1

        """
        if alphabet:
            alphabet = tuple(alphabet)
            for i in src:
                if i not in alphabet:
                    raise ValueError('src value not in alphabet')
            for i in tar:
                if i not in alphabet:
                    raise ValueError('tar value not in alphabet')

        if src == tar:
            if mat and (src, src) in mat:
                return mat[(src, src)]
            return match_cost
        if mat and (src, tar) in mat:
            return mat[(src, tar)]
        elif symmetric and mat and (tar, src) in mat:
            return mat[(tar, src)]
        return mismatch_cost

    def dist_abs(self, src, tar, gap_cost=1, sim_func=sim_ident):

        """Return the Needleman-Wunsch score of two strings.

        Args:
            src (str): Source string for comparison
            tar (str): Target string for comparison
            gap_cost (float): the cost of an alignment gap (1 by default)
            sim_func (function): a function that returns the similarity of two
                characters (identity similarity by default)

        Returns:
            float: Needleman-Wunsch score

        Examples:
            >>> cmp = NeedlemanWunsch()
            >>> cmp.dist_abs('cat', 'hat')
            2.0
            >>> cmp.dist_abs('Niall', 'Neil')
            1.0
            >>> cmp.dist_abs('aluminum', 'Catalan')
            -1.0
            >>> cmp.dist_abs('ATCG', 'TAGC')
            0.0

        """
        d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)

        for i in range(len(src) + 1):
            d_mat[i, 0] = -(i * gap_cost)
        for j in range(len(tar) + 1):
            d_mat[0, j] = -(j * gap_cost)
        for i in range(1, len(src) + 1):
            for j in range(1, len(tar) + 1):
                match = d_mat[i - 1, j - 1] + sim_func(src[i - 1], tar[j - 1])
                delete = d_mat[i - 1, j] - gap_cost
                insert = d_mat[i, j - 1] - gap_cost
                d_mat[i, j] = max(match, delete, insert)
        return d_mat[d_mat.shape[0] - 1, d_mat.shape[1] - 1]


def needleman_wunsch(src, tar, gap_cost=1, sim_func=sim_ident):
    """Return the Needleman-Wunsch score of two strings.

    This is a wrapper for :py:meth:`NeedlemanWunsch.dist_abs`.

    Args:
        src (str): Source string for comparison
        tar (str): Target string for comparison
        gap_cost (float): the cost of an alignment gap (1 by default)
        sim_func (function): a function that returns the similarity of two
            characters (identity similarity by default)

    Returns:
        float: Needleman-Wunsch score

    Examples:
        >>> needleman_wunsch('cat', 'hat')
        2.0
        >>> needleman_wunsch('Niall', 'Neil')
        1.0
        >>> needleman_wunsch('aluminum', 'Catalan')
        -1.0
        >>> needleman_wunsch('ATCG', 'TAGC')
        0.0

    """
    return NeedlemanWunsch().dist_abs(src, tar, gap_cost, sim_func)


class SmithWaterman(NeedlemanWunsch):

    """Smith-Waterman score.

    The Smith-Waterman score :cite:`Smith:1981` is a standard edit distance
    measure, differing from Needleman-Wunsch in that it focuses on local
    alignment and disallows negative scores.
    """

    def dist_abs(self, src, tar, gap_cost=1, sim_func=sim_ident):

        """Return the Smith-Waterman score of two strings.

        Args:
            src (str): Source string for comparison
            tar (str): Target string for comparison
            gap_cost (float): the cost of an alignment gap (1 by default)
            sim_func (function): a function that returns the similarity of two
                characters (identity similarity by default)

        Returns:
            float: Smith-Waterman score

        Examples:
            >>> cmp = SmithWaterman()
            >>> cmp.dist_abs('cat', 'hat')
            2.0
            >>> cmp.dist_abs('Niall', 'Neil')
            1.0
            >>> cmp.dist_abs('aluminum', 'Catalan')
            0.0
            >>> cmp.dist_abs('ATCG', 'TAGC')
            1.0

        """
        d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)

        for i in range(len(src) + 1):
            d_mat[i, 0] = 0
        for j in range(len(tar) + 1):
            d_mat[0, j] = 0
        for i in range(1, len(src) + 1):
            for j in range(1, len(tar) + 1):
                match = d_mat[i - 1, j - 1] + sim_func(src[i - 1], tar[j - 1])
                delete = d_mat[i - 1, j] - gap_cost
                insert = d_mat[i, j - 1] - gap_cost
                d_mat[i, j] = max(0, match, delete, insert)
        return d_mat[d_mat.shape[0] - 1, d_mat.shape[1] - 1]


def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
    """Return the Smith-Waterman score of two strings.

    This is a wrapper for :py:meth:`SmithWaterman.dist_abs`.

    Args:
        src (str): Source string for comparison
        tar (str): Target string for comparison
        gap_cost (float): the cost of an alignment gap (1 by default)
        sim_func (function): a function that returns the similarity of two
            characters (identity similarity by default)

    Returns:
        float: Smith-Waterman score

    Examples:
        >>> smith_waterman('cat', 'hat')
        2.0
        >>> smith_waterman('Niall', 'Neil')
        1.0
        >>> smith_waterman('aluminum', 'Catalan')
        0.0
        >>> smith_waterman('ATCG', 'TAGC')
        1.0

    """
    return SmithWaterman().dist_abs(src, tar, gap_cost, sim_func)


class Gotoh(NeedlemanWunsch):

    """Gotoh score.

    The Gotoh score :cite:`Gotoh:1982` is essentially Needleman-Wunsch with
    affine gap penalties.
    """

    def dist_abs(self, src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident):

        """Return the Gotoh score of two strings.

        Args:
            src (str): Source string for comparison
            tar (str): Target string for comparison
            gap_open (float): the cost of an open alignment gap (1 by default)
            gap_ext (float): the cost of an alignment gap extension (0.4 by
                default)
            sim_func (function): a function that returns the similarity of two
                characters (identity similarity by default)

        Returns:
            float: Gotoh score

        Examples:
            >>> cmp = Gotoh()
            >>> cmp.dist_abs('cat', 'hat')
            2.0
            >>> cmp.dist_abs('Niall', 'Neil')
            1.0
            >>> round(cmp.dist_abs('aluminum', 'Catalan'), 12)
            -0.4
            >>> cmp.dist_abs('cat', 'hat')
            2.0

        """
        d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
        p_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
        q_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)

        d_mat[0, 0] = 0
        p_mat[0, 0] = float('-inf')
        q_mat[0, 0] = float('-inf')
        for i in range(1, len(src) + 1):
            d_mat[i, 0] = float('-inf')
            p_mat[i, 0] = -gap_open - gap_ext * (i - 1)
            q_mat[i, 0] = float('-inf')
            q_mat[i, 1] = -gap_open
        for j in range(1, len(tar) + 1):
            d_mat[0, j] = float('-inf')
            p_mat[0, j] = float('-inf')
            p_mat[1, j] = -gap_open
            q_mat[0, j] = -gap_open - gap_ext * (j - 1)

        for i in range(1, len(src) + 1):
            for j in range(1, len(tar) + 1):
                sim_val = sim_func(src[i - 1], tar[j - 1])
                d_mat[i, j] = max(
                    d_mat[i - 1, j - 1] + sim_val,
                    p_mat[i - 1, j - 1] + sim_val,
                    q_mat[i - 1, j - 1] + sim_val,
                )

                p_mat[i, j] = max(
                    d_mat[i - 1, j] - gap_open, p_mat[i - 1, j] - gap_ext
                )

                q_mat[i, j] = max(
                    d_mat[i, j - 1] - gap_open, q_mat[i, j - 1] - gap_ext
                )

        i, j = (n - 1 for n in d_mat.shape)

        return max(d_mat[i, j], p_mat[i, j], q_mat[i, j])


def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident):
    """Return the Gotoh score of two strings.

    This is a wrapper for :py:meth:`Gotoh.dist_abs`.

    Args:
        src (str): Source string for comparison
        tar (str): Target string for comparison
        gap_open (float): the cost of an open alignment gap (1 by default)
        gap_ext (float): the cost of an alignment gap extension (0.4 by
            default)
        sim_func (function): a function that returns the similarity of two
            characters (identity similarity by default)

    Returns:
        float: Gotoh score

    Examples:
        >>> gotoh('cat', 'hat')
        2.0
        >>> gotoh('Niall', 'Neil')
        1.0
        >>> round(gotoh('aluminum', 'Catalan'), 12)
        -0.4
        >>> gotoh('cat', 'hat')
        2.0

    """
    return Gotoh().dist_abs(src, tar, gap_open, gap_ext, sim_func)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1			# -- coding: utf-8 --
2
3			# Copyright 2014-2018 by Christopher C. Little.
4			# This file is part of Abydos.
5			#
6			# Abydos is free software: you can redistribute it and/or modify
7			# it under the terms of the GNU General Public License as published by
8			# the Free Software Foundation, either version 3 of the License, or
9			# (at your option) any later version.
10			#
11			# Abydos is distributed in the hope that it will be useful,
12			# but WITHOUT ANY WARRANTY; without even the implied warranty of
13			# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14			# GNU General Public License for more details.
15			#
16			# You should have received a copy of the GNU General Public License
17			# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1		"""abydos.distance.seqalign.
20
21			The distance.seqalign module implements string edit distance functions
22			used in sequence alignment:
23
24			- Matrix similarity
25			- Needleman-Wunsch score
26			- Smith-Waterman score
27			- Gotoh score
28			"""
29
30	1		from __future__ import unicode_literals
31
32	1		from numpy import float32 as np_float32
33	1		from numpy import zeros as np_zeros
34
35	1		from six.moves import range
36
37	1		from ._basic import sim_ident
38	1		from ._distance import Distance
39
40	1		__all__ = [
41			'Gotoh',
42			'NeedlemanWunsch',
43			'SmithWaterman',
44			'gotoh',
45			'needleman_wunsch',
46			'smith_waterman',
47			]
48
49
50	1		class NeedlemanWunsch(Distance):
			0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
51			"""Needleman-Wunsch score.
52
53			The Needleman-Wunsch score :cite:`Needleman:1970` is a standard edit
54			distance measure.
55			"""
56
57	1		@staticmethod
58	1		def sim_matrix(
			0 ignored issues – show best-practice introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Too many arguments (7/5) Loading history...
59			src,
			0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
60			tar,
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
61			mat=None,
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
62			mismatch_cost=0,
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
63			match_cost=1,
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
64			symmetric=True,
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
65			alphabet=None,
			0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
66			):
67			"""Return the matrix similarity of two strings.
68
69			With the default parameters, this is identical to sim_ident.
70			It is possible for sim_matrix to return values outside of the range
71			:math:`[0, 1]`, if values outside that range are present in mat,
72			mismatch_cost, or match_cost.
73
74			Args:
75			src (str): Source string for comparison
76			tar (str): Target string for comparison
77			mat (dict): A dict mapping tuples to costs; the tuples are (src,
78			tar) pairs of symbols from the alphabet parameter
79			mismatch_cost (float): the value returned if (src, tar) is absent
80			from mat when src does not equal tar
81			match_cost (float): the value returned if (src, tar) is absent from
82			mat when src equals tar
83			symmetric (bool): True if the cost of src not matching tar is
84			identical to the cost of tar not matching src; in this case,
85			the values in mat need only contain (src, tar) or (tar, src),
86			not both
87			alphabet (str): a collection of tokens from which src and tar are
88			drawn; if this is defined a ValueError is raised if either tar
89			or src is not found in alphabet
90
91			Returns:
92			float: Matrix similarity
93
94			Raises:
95			ValueError: src value not in alphabet
96			ValueError: tar value not in alphabet
97
98			Examples:
99			>>> NeedlemanWunsch.sim_matrix('cat', 'hat')
100			0
101			>>> NeedlemanWunsch.sim_matrix('hat', 'hat')
102			1
103
104			"""
105	1		if alphabet:
106	1		alphabet = tuple(alphabet)
107	1		for i in src:
108	1		if i not in alphabet:
109	1		raise ValueError('src value not in alphabet')
110	1		for i in tar:
111	1		if i not in alphabet:
112	1		raise ValueError('tar value not in alphabet')
113
114	1		if src == tar:
115	1		if mat and (src, src) in mat:
116	1		return mat[(src, src)]
117	1		return match_cost
118	1		if mat and (src, tar) in mat:
119	1		return mat[(src, tar)]
120	1		elif symmetric and mat and (tar, src) in mat:
121	1		return mat[(tar, src)]
122	1		return mismatch_cost
123
124	1	View Code Duplication	def dist_abs(self, src, tar, gap_cost=1, sim_func=sim_ident):
			0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'dist_abs' method Loading history... Duplication introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
125			"""Return the Needleman-Wunsch score of two strings.
126
127			Args:
128			src (str): Source string for comparison
129			tar (str): Target string for comparison
130			gap_cost (float): the cost of an alignment gap (1 by default)
131			sim_func (function): a function that returns the similarity of two
132			characters (identity similarity by default)
133
134			Returns:
135			float: Needleman-Wunsch score
136
137			Examples:
138			>>> cmp = NeedlemanWunsch()
139			>>> cmp.dist_abs('cat', 'hat')
140			2.0
141			>>> cmp.dist_abs('Niall', 'Neil')
142			1.0
143			>>> cmp.dist_abs('aluminum', 'Catalan')
144			-1.0
145			>>> cmp.dist_abs('ATCG', 'TAGC')
146			0.0
147
148			"""
149	1		d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
150
151	1		for i in range(len(src) + 1):
152	1		d_mat[i, 0] = -(i * gap_cost)
153	1		for j in range(len(tar) + 1):
154	1		d_mat[0, j] = -(j * gap_cost)
155	1		for i in range(1, len(src) + 1):
156	1		for j in range(1, len(tar) + 1):
157	1		match = d_mat[i - 1, j - 1] + sim_func(src[i - 1], tar[j - 1])
158	1		delete = d_mat[i - 1, j] - gap_cost
159	1		insert = d_mat[i, j - 1] - gap_cost
160	1		d_mat[i, j] = max(match, delete, insert)
161	1		return d_mat[d_mat.shape[0] - 1, d_mat.shape[1] - 1]
162
163
164	1		def needleman_wunsch(src, tar, gap_cost=1, sim_func=sim_ident):
165			"""Return the Needleman-Wunsch score of two strings.
166
167			This is a wrapper for :py:meth:`NeedlemanWunsch.dist_abs`.
168
169			Args:
170			src (str): Source string for comparison
171			tar (str): Target string for comparison
172			gap_cost (float): the cost of an alignment gap (1 by default)
173			sim_func (function): a function that returns the similarity of two
174			characters (identity similarity by default)
175
176			Returns:
177			float: Needleman-Wunsch score
178
179			Examples:
180			>>> needleman_wunsch('cat', 'hat')
181			2.0
182			>>> needleman_wunsch('Niall', 'Neil')
183			1.0
184			>>> needleman_wunsch('aluminum', 'Catalan')
185			-1.0
186			>>> needleman_wunsch('ATCG', 'TAGC')
187			0.0
188
189			"""
190	1		return NeedlemanWunsch().dist_abs(src, tar, gap_cost, sim_func)
191
192
193	1		class SmithWaterman(NeedlemanWunsch):
			0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
194			"""Smith-Waterman score.
195
196			The Smith-Waterman score :cite:`Smith:1981` is a standard edit distance
197			measure, differing from Needleman-Wunsch in that it focuses on local
198			alignment and disallows negative scores.
199			"""
200
201	1	View Code Duplication	def dist_abs(self, src, tar, gap_cost=1, sim_func=sim_ident):
			0 ignored issues – show Duplication introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
202			"""Return the Smith-Waterman score of two strings.
203
204			Args:
205			src (str): Source string for comparison
206			tar (str): Target string for comparison
207			gap_cost (float): the cost of an alignment gap (1 by default)
208			sim_func (function): a function that returns the similarity of two
209			characters (identity similarity by default)
210
211			Returns:
212			float: Smith-Waterman score
213
214			Examples:
215			>>> cmp = SmithWaterman()
216			>>> cmp.dist_abs('cat', 'hat')
217			2.0
218			>>> cmp.dist_abs('Niall', 'Neil')
219			1.0
220			>>> cmp.dist_abs('aluminum', 'Catalan')
221			0.0
222			>>> cmp.dist_abs('ATCG', 'TAGC')
223			1.0
224
225			"""
226	1		d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
227
228	1		for i in range(len(src) + 1):
229	1		d_mat[i, 0] = 0
230	1		for j in range(len(tar) + 1):
231	1		d_mat[0, j] = 0
232	1		for i in range(1, len(src) + 1):
233	1		for j in range(1, len(tar) + 1):
234	1		match = d_mat[i - 1, j - 1] + sim_func(src[i - 1], tar[j - 1])
235	1		delete = d_mat[i - 1, j] - gap_cost
236	1		insert = d_mat[i, j - 1] - gap_cost
237	1		d_mat[i, j] = max(0, match, delete, insert)
238	1		return d_mat[d_mat.shape[0] - 1, d_mat.shape[1] - 1]
239
240
241	1		def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
242			"""Return the Smith-Waterman score of two strings.
243
244			This is a wrapper for :py:meth:`SmithWaterman.dist_abs`.
245
246			Args:
247			src (str): Source string for comparison
248			tar (str): Target string for comparison
249			gap_cost (float): the cost of an alignment gap (1 by default)
250			sim_func (function): a function that returns the similarity of two
251			characters (identity similarity by default)
252
253			Returns:
254			float: Smith-Waterman score
255
256			Examples:
257			>>> smith_waterman('cat', 'hat')
258			2.0
259			>>> smith_waterman('Niall', 'Neil')
260			1.0
261			>>> smith_waterman('aluminum', 'Catalan')
262			0.0
263			>>> smith_waterman('ATCG', 'TAGC')
264			1.0
265
266			"""
267	1		return SmithWaterman().dist_abs(src, tar, gap_cost, sim_func)
268
269
270	1		class Gotoh(NeedlemanWunsch):
			0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
271			"""Gotoh score.
272
273			The Gotoh score :cite:`Gotoh:1982` is essentially Needleman-Wunsch with
274			affine gap penalties.
275			"""
276
277	1		def dist_abs(self, src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident):
			0 ignored issues – show best-practice introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Too many arguments (6/5) Loading history... Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'dist_abs' method Loading history...
278			"""Return the Gotoh score of two strings.
279
280			Args:
281			src (str): Source string for comparison
282			tar (str): Target string for comparison
283			gap_open (float): the cost of an open alignment gap (1 by default)
284			gap_ext (float): the cost of an alignment gap extension (0.4 by
285			default)
286			sim_func (function): a function that returns the similarity of two
287			characters (identity similarity by default)
288
289			Returns:
290			float: Gotoh score
291
292			Examples:
293			>>> cmp = Gotoh()
294			>>> cmp.dist_abs('cat', 'hat')
295			2.0
296			>>> cmp.dist_abs('Niall', 'Neil')
297			1.0
298			>>> round(cmp.dist_abs('aluminum', 'Catalan'), 12)
299			-0.4
300			>>> cmp.dist_abs('cat', 'hat')
301			2.0
302
303			"""
304	1		d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
305	1		p_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
306	1		q_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
307
308	1		d_mat[0, 0] = 0
309	1		p_mat[0, 0] = float('-inf')
310	1		q_mat[0, 0] = float('-inf')
311	1		for i in range(1, len(src) + 1):
312	1		d_mat[i, 0] = float('-inf')
313	1		p_mat[i, 0] = -gap_open - gap_ext * (i - 1)
314	1		q_mat[i, 0] = float('-inf')
315	1		q_mat[i, 1] = -gap_open
316	1		for j in range(1, len(tar) + 1):
317	1		d_mat[0, j] = float('-inf')
318	1		p_mat[0, j] = float('-inf')
319	1		p_mat[1, j] = -gap_open
320	1		q_mat[0, j] = -gap_open - gap_ext * (j - 1)
321
322	1		for i in range(1, len(src) + 1):
323	1		for j in range(1, len(tar) + 1):
324	1		sim_val = sim_func(src[i - 1], tar[j - 1])
325	1		d_mat[i, j] = max(
326			d_mat[i - 1, j - 1] + sim_val,
327			p_mat[i - 1, j - 1] + sim_val,
328			q_mat[i - 1, j - 1] + sim_val,
329			)
330
331	1		p_mat[i, j] = max(
332			d_mat[i - 1, j] - gap_open, p_mat[i - 1, j] - gap_ext
333			)
334
335	1		q_mat[i, j] = max(
336			d_mat[i, j - 1] - gap_open, q_mat[i, j - 1] - gap_ext
337			)
338
339	1		i, j = (n - 1 for n in d_mat.shape)
			0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report The variable `n` does not seem to be defined. Loading history...
340	1		return max(d_mat[i, j], p_mat[i, j], q_mat[i, j])
341
342
343	1		def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident):
344			"""Return the Gotoh score of two strings.
345
346			This is a wrapper for :py:meth:`Gotoh.dist_abs`.
347
348			Args:
349			src (str): Source string for comparison
350			tar (str): Target string for comparison
351			gap_open (float): the cost of an open alignment gap (1 by default)
352			gap_ext (float): the cost of an alignment gap extension (0.4 by
353			default)
354			sim_func (function): a function that returns the similarity of two
355			characters (identity similarity by default)
356
357			Returns:
358			float: Gotoh score
359
360			Examples:
361			>>> gotoh('cat', 'hat')
362			2.0
363			>>> gotoh('Niall', 'Neil')
364			1.0
365			>>> round(gotoh('aluminum', 'Catalan'), 12)
366			-0.4
367			>>> gotoh('cat', 'hat')
368			2.0
369
370			"""
371	1		return Gotoh().dist_abs(src, tar, gap_open, gap_ext, sim_func)
372
373
374			if __name__ == '__main__':
375			import doctest
376
377			doctest.testmod()
378

chrislit / abydos

Pull Request — master (#141)

abydos.distance._seqalign.needleman_wunsch() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like