abydos.distance._levenshtein - Code Metrics - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

abydos.distance._levenshtein A
last analyzed 2020-12-31 20:10 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	405
Duplicated Lines	0 %

Test Coverage

Coverage

100%

Importance

Changes

Metric	Value
wmc	36
eloc	141
dl	0
loc	405
ccs	99
cts	99
cp	1
rs	9.52
c	0
b	0
f	0

6 Methods

Rating	Name	Size	Complexity
B	Levenshtein.alignment()	76	8
A	Levenshtein.__init__()	47	1
A	Levenshtein._taper()	5	2
A	Levenshtein.dist_abs()	64	5
F	Levenshtein._alignment_matrix()	78	17
A	Levenshtein.dist()	66	3

# Copyright 2014-2020 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.distance._levenshtein.

The distance._Levenshtein module implements string edit distance functions
based on Levenshtein distance, including:

    - Levenshtein distance
    - Optimal String Alignment distance
"""

from sys import float_info
from typing import Any, Callable, List, Tuple, Union, cast

import numpy as np

from ._distance import _Distance

__all__ = ['Levenshtein']


class Levenshtein(_Distance):
    """Levenshtein distance.

    This is the standard edit distance measure. Cf.
    :cite:`Levenshtein:1965,Levenshtein:1966`.

    Optimal string alignment (aka restricted
    Damerau-Levenshtein distance) :cite:`Boytsov:2011` is also supported.

    The ordinary Levenshtein & Optimal String Alignment distance both
    employ the Wagner-Fischer dynamic programming algorithm
    :cite:`Wagner:1974`.

    Levenshtein edit distance ordinarily has unit insertion, deletion, and
    substitution costs.

    .. versionadded:: 0.3.6
    .. versionchanged:: 0.4.0
        Added taper option
    """

    def __init__(
        self,
        mode: str = 'lev',
        cost: Tuple[float, float, float, float] = (1, 1, 1, 1),
        normalizer: Callable[[List[float]], float] = max,
        taper: bool = False,
        **kwargs: Any
    ) -> None:
        """Initialize Levenshtein instance.

        Parameters
        ----------
        mode : str
            Specifies a mode for computing the Levenshtein distance:

                - ``lev`` (default) computes the ordinary Levenshtein distance,
                  in which edits may include inserts, deletes, and
                  substitutions
                - ``osa`` computes the Optimal String Alignment distance, in
                  which edits may include inserts, deletes, substitutions, and
                  transpositions but substrings may only be edited once

        cost : tuple
            A 4-tuple representing the cost of the four possible edits:
            inserts, deletes, substitutions, and transpositions, respectively
            (by default: (1, 1, 1, 1))
        normalizer : function
            A function that takes an list and computes a normalization term
            by which the edit distance is divided (max by default). Another
            good option is the sum function.
        taper : bool
            Enables cost tapering. Following :cite:`Zobel:1996`, it causes
            edits at the start of the string to "just [exceed] twice the
            minimum penalty for replacement or deletion at the end of the
            string".
        **kwargs
            Arbitrary keyword arguments


        .. versionadded:: 0.4.0

        """
        super(Levenshtein, self).__init__(**kwargs)
        self._mode = mode
        self._cost = cost
        self._normalizer = normalizer
        self._taper_enabled = taper

    def _taper(self, pos: int, length: int) -> float:
        return (
            round(1 + ((length - pos) / length) * (1 + float_info.epsilon), 15)
            if self._taper_enabled
            else 1
        )

    def _alignment_matrix(
        self, src: str, tar: str, backtrace: bool = True
    ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
        """Return the Levenshtein alignment matrix.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison
        backtrace : bool
            Return the backtrace matrix as well

        Returns
        -------
        numpy.ndarray or tuple(numpy.ndarray, numpy.ndarray)
            The alignment matrix and (optionally) the backtrace matrix


        .. versionadded:: 0.4.1

        """
        ins_cost, del_cost, sub_cost, trans_cost = self._cost

        src_len = len(src)
        tar_len = len(tar)
        max_len = max(src_len, tar_len)

        d_mat = np.zeros((src_len + 1, tar_len + 1), dtype=np.float_)
        if backtrace:
            trace_mat = np.zeros((src_len + 1, tar_len + 1), dtype=np.int8)
        for i in range(src_len + 1):
            d_mat[i, 0] = i * self._taper(i, max_len) * del_cost
            if backtrace:
                trace_mat[i, 0] = 1
        for j in range(tar_len + 1):
            d_mat[0, j] = j * self._taper(j, max_len) * ins_cost
            if backtrace:
                trace_mat[0, j] = 0

        for i in range(src_len):
            for j in range(tar_len):
                opts = (
                    d_mat[i + 1, j]
                    + ins_cost * self._taper(1 + max(i, j), max_len),  # ins
                    d_mat[i, j + 1]
                    + del_cost * self._taper(1 + max(i, j), max_len),  # del
                    d_mat[i, j]
                    + (
                        sub_cost * self._taper(1 + max(i, j), max_len)
                        if src[i] != tar[j]
                        else 0
                    ),  # sub/==
                )
                d_mat[i + 1, j + 1] = min(opts)
                if backtrace:
                    trace_mat[i + 1, j + 1] = int(np.argmin(opts))

                if self._mode == 'osa':
                    if (
                        i + 1 > 1
                        and j + 1 > 1
                        and src[i] == tar[j - 1]
                        and src[i - 1] == tar[j]
                    ):
                        # transposition
                        d_mat[i + 1, j + 1] = min(
                            d_mat[i + 1, j + 1],
                            d_mat[i - 1, j - 1]
                            + trans_cost * self._taper(1 + max(i, j), max_len),
                        )
                        if backtrace:
                            trace_mat[i + 1, j + 1] = 2

        if backtrace:
            return d_mat, trace_mat

        return d_mat

    def alignment(self, src: str, tar: str) -> Tuple[float, str, str]:
        """Return the Levenshtein alignment of two strings.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        tuple
            A tuple containing the Levenshtein distance and the two strings,
            aligned.

        Examples
        --------
        >>> cmp = Levenshtein()
        >>> cmp.alignment('cat', 'hat')
        (1.0, 'cat', 'hat')
        >>> cmp.alignment('Niall', 'Neil')
        (3.0, 'N-iall', 'Nei-l-')
        >>> cmp.alignment('aluminum', 'Catalan')
        (7.0, '-aluminum', 'Catalan--')
        >>> cmp.alignment('ATCG', 'TAGC')
        (3.0, 'ATCG-', '-TAGC')

        >>> cmp = Levenshtein(mode='osa')
        >>> cmp.alignment('ATCG', 'TAGC')
        (2.0, 'ATCG', 'TAGC')
        >>> cmp.alignment('ACTG', 'TAGC')
        (4.0, 'ACT-G-', '--TAGC')


        .. versionadded:: 0.4.1

        """
        d_mat, trace_mat = self._alignment_matrix(src, tar, backtrace=True)

        src_aligned = []
        tar_aligned = []

        src_pos = len(src)
        tar_pos = len(tar)

        distance = d_mat[src_pos, tar_pos]

        while src_pos and tar_pos:

            src_trace, tar_trace = (
                (src_pos, tar_pos - 1),
                (src_pos - 1, tar_pos),
                (src_pos - 1, tar_pos - 1),
            )[trace_mat[src_pos, tar_pos]]

            if src_pos != src_trace and tar_pos != tar_trace:
                src_aligned.append(src[src_trace])
                tar_aligned.append(tar[tar_trace])
            elif src_pos != src_trace:
                src_aligned.append(src[src_trace])
                tar_aligned.append('-')
            else:
                src_aligned.append('-')
                tar_aligned.append(tar[tar_trace])
            src_pos, tar_pos = src_trace, tar_trace
        while tar_pos:
            tar_pos -= 1
            src_aligned.append('-')
            tar_aligned.append(tar[tar_pos])
        while src_pos:
            src_pos -= 1
            src_aligned.append(src[src_pos])
            tar_aligned.append('-')

        return distance, ''.join(src_aligned[::-1]), ''.join(tar_aligned[::-1])

    def dist_abs(self, src: str, tar: str) -> float:
        """Return the Levenshtein distance between two strings.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        int (may return a float if cost has float values)
            The Levenshtein distance between src & tar

        Examples
        --------
        >>> cmp = Levenshtein()
        >>> cmp.dist_abs('cat', 'hat')
        1
        >>> cmp.dist_abs('Niall', 'Neil')
        3
        >>> cmp.dist_abs('aluminum', 'Catalan')
        7
        >>> cmp.dist_abs('ATCG', 'TAGC')
        3

        >>> cmp = Levenshtein(mode='osa')
        >>> cmp.dist_abs('ATCG', 'TAGC')
        2
        >>> cmp.dist_abs('ACTG', 'TAGC')
        4


        .. versionadded:: 0.1.0
        .. versionchanged:: 0.3.6
            Encapsulated in class

        """
        ins_cost, del_cost, sub_cost, trans_cost = self._cost

        src_len = len(src)
        tar_len = len(tar)
        max_len = max(src_len, tar_len)

        if src == tar:
            return 0
        if not src:
            return sum(
                ins_cost * self._taper(pos, max_len) for pos in range(tar_len)
            )
        if not tar:
            return sum(
                del_cost * self._taper(pos, max_len) for pos in range(src_len)
            )

        d_mat = cast(
            np.ndarray, self._alignment_matrix(src, tar, backtrace=False)
        )

        if int(d_mat[src_len, tar_len]) == d_mat[src_len, tar_len]:
            return int(d_mat[src_len, tar_len])
        else:
            return cast(float, d_mat[src_len, tar_len])

    def dist(self, src: str, tar: str) -> float:
        """Return the normalized Levenshtein distance between two strings.

        The Levenshtein distance is normalized by dividing the Levenshtein
        distance (calculated by either of the two supported methods) by the
        greater of the number of characters in src times the cost of a delete
        and the number of characters in tar times the cost of an insert.
        For the case in which all operations have :math:`cost = 1`, this is
        equivalent to the greater of the length of the two strings src & tar.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        float
            The normalized Levenshtein distance between src & tar

        Examples
        --------
        >>> cmp = Levenshtein()
        >>> round(cmp.dist('cat', 'hat'), 12)
        0.333333333333
        >>> round(cmp.dist('Niall', 'Neil'), 12)
        0.6
        >>> cmp.dist('aluminum', 'Catalan')
        0.875
        >>> cmp.dist('ATCG', 'TAGC')
        0.75


        .. versionadded:: 0.1.0
        .. versionchanged:: 0.3.6
            Encapsulated in class

        """
        if src == tar:
            return 0.0
        ins_cost, del_cost = self._cost[:2]

        src_len = len(src)
        tar_len = len(tar)

        if self._taper_enabled:
            normalize_term = self._normalizer(
                [
                    sum(
                        self._taper(pos, src_len) * del_cost
                        for pos in range(src_len)
                    ),
                    sum(
                        self._taper(pos, tar_len) * ins_cost
                        for pos in range(tar_len)
                    ),
                ]
            )
        else:
            normalize_term = self._normalizer(
                [src_len * del_cost, tar_len * ins_cost]
            )

        return self.dist_abs(src, tar) / normalize_term


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# Copyright 2014-2020 by Christopher C. Little.
2		# This file is part of Abydos.
3		#
4		# Abydos is free software: you can redistribute it and/or modify
5		# it under the terms of the GNU General Public License as published by
6		# the Free Software Foundation, either version 3 of the License, or
7		# (at your option) any later version.
8		#
9		# Abydos is distributed in the hope that it will be useful,
10		# but WITHOUT ANY WARRANTY; without even the implied warranty of
11		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12		# GNU General Public License for more details.
13		#
14		# You should have received a copy of the GNU General Public License
15		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17		"""abydos.distance._levenshtein.
18
19	1	The distance._Levenshtein module implements string edit distance functions
20		based on Levenshtein distance, including:
21
22		- Levenshtein distance
23		- Optimal String Alignment distance
24		"""
25
26		from sys import float_info
27		from typing import Any, Callable, List, Tuple, Union, cast
28	1
29		import numpy as np
30
31		from ._distance import _Distance
32
33		__all__ = ['Levenshtein']
34
35	1
36		class Levenshtein(_Distance):
37	1	"""Levenshtein distance.
38
39	1	This is the standard edit distance measure. Cf.
40		:cite:`Levenshtein:1965,Levenshtein:1966`.
41	1
42		Optimal string alignment (aka restricted
43	1	Damerau-Levenshtein distance) :cite:`Boytsov:2011` is also supported.
44	1
45		The ordinary Levenshtein & Optimal String Alignment distance both
46	1	employ the Wagner-Fischer dynamic programming algorithm
47		:cite:`Wagner:1974`.
48
49	1	Levenshtein edit distance ordinarily has unit insertion, deletion, and
50		substitution costs.
51
52		.. versionadded:: 0.3.6
53		.. versionchanged:: 0.4.0
54		Added taper option
55		"""
56
57		def __init__(
58		self,
59		mode: str = 'lev',
60		cost: Tuple[float, float, float, float] = (1, 1, 1, 1),
61		normalizer: Callable[[List[float]], float] = max,
62		taper: bool = False,
63		**kwargs: Any
64		) -> None:
65		"""Initialize Levenshtein instance.
66
67		Parameters
68		----------
69		mode : str
70	1	Specifies a mode for computing the Levenshtein distance:
71
72		- ``lev`` (default) computes the ordinary Levenshtein distance,
73		in which edits may include inserts, deletes, and
74		substitutions
75		- ``osa`` computes the Optimal String Alignment distance, in
76		which edits may include inserts, deletes, substitutions, and
77		transpositions but substrings may only be edited once
78
79		cost : tuple
80		A 4-tuple representing the cost of the four possible edits:
81		inserts, deletes, substitutions, and transpositions, respectively
82		(by default: (1, 1, 1, 1))
83		normalizer : function
84		A function that takes an list and computes a normalization term
85		by which the edit distance is divided (max by default). Another
86		good option is the sum function.
87		taper : bool
88		Enables cost tapering. Following :cite:`Zobel:1996`, it causes
89		edits at the start of the string to "just [exceed] twice the
90		minimum penalty for replacement or deletion at the end of the
91		string".
92		**kwargs
93		Arbitrary keyword arguments
94
95
96		.. versionadded:: 0.4.0
97
98		"""
99		super(Levenshtein, self).__init__(**kwargs)
100		self._mode = mode
101		self._cost = cost
102		self._normalizer = normalizer
103		self._taper_enabled = taper
104
105		def _taper(self, pos: int, length: int) -> float:
106		return (
107		round(1 + ((length - pos) / length) * (1 + float_info.epsilon), 15)
108		if self._taper_enabled
109		else 1
110		)
111
112	1	def _alignment_matrix(
113	1	self, src: str, tar: str, backtrace: bool = True
114	1	) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
115	1	"""Return the Levenshtein alignment matrix.
116	1
117		Parameters
118	1	----------
119	1	src : str
120		Source string for comparison
121		tar : str
122		Target string for comparison
123		backtrace : bool
124		Return the backtrace matrix as well
125	1
126		Returns
127		-------
128		numpy.ndarray or tuple(numpy.ndarray, numpy.ndarray)
129		The alignment matrix and (optionally) the backtrace matrix
130
131
132		.. versionadded:: 0.4.1
133
134		"""
135		ins_cost, del_cost, sub_cost, trans_cost = self._cost
136
137		src_len = len(src)
138		tar_len = len(tar)
139		max_len = max(src_len, tar_len)
140
141		d_mat = np.zeros((src_len + 1, tar_len + 1), dtype=np.float_)
142		if backtrace:
143		trace_mat = np.zeros((src_len + 1, tar_len + 1), dtype=np.int8)
144		for i in range(src_len + 1):
145		d_mat[i, 0] = i * self._taper(i, max_len) * del_cost
146	1	if backtrace:
147		trace_mat[i, 0] = 1
148	1	for j in range(tar_len + 1):
149	1	d_mat[0, j] = j * self._taper(j, max_len) * ins_cost
150	1	if backtrace:
151		trace_mat[0, j] = 0
152	1
153	1	for i in range(src_len):
154	1	for j in range(tar_len):
155	1	opts = (
156	1	d_mat[i + 1, j]
157	1	+ ins_cost * self._taper(1 + max(i, j), max_len), # ins
158	1	d_mat[i, j + 1]
159	1	+ del_cost * self._taper(1 + max(i, j), max_len), # del
160	1	d_mat[i, j]
161	1	+ (
162	1	sub_cost * self._taper(1 + max(i, j), max_len)
163		if src[i] != tar[j]
164	1	else 0
165	1	), # sub/==
166	1	)
167		d_mat[i + 1, j + 1] = min(opts)
168		if backtrace:
169		trace_mat[i + 1, j + 1] = int(np.argmin(opts))
170
171		if self._mode == 'osa':
172		if (
173		i + 1 > 1
174		and j + 1 > 1
175		and src[i] == tar[j - 1]
176		and src[i - 1] == tar[j]
177		):
178	1	# transposition
179	1	d_mat[i + 1, j + 1] = min(
180	1	d_mat[i + 1, j + 1],
181		d_mat[i - 1, j - 1]
182	1	+ trans_cost * self._taper(1 + max(i, j), max_len),
183	1	)
184		if backtrace:
185		trace_mat[i + 1, j + 1] = 2
186
187		if backtrace:
188		return d_mat, trace_mat
		0 ignored issues – show introduced 2020-01-06 02:51 UTC by Report Bug Copy Issue Report The variable `trace_mat` does not seem to be defined in case `backtrace` on line `142` is `False`. Are you sure this can never be the case? Loading history...
189		return d_mat
190	1
191		def alignment(self, src: str, tar: str) -> Tuple[float, str, str]:
192		"""Return the Levenshtein alignment of two strings.
193
194		Parameters
195	1	----------
196	1	src : str
197		Source string for comparison
198	1	tar : str
199	1	Target string for comparison
200	1
201		Returns
202	1	-------
203		tuple
204		A tuple containing the Levenshtein distance and the two strings,
205		aligned.
206
207		Examples
208		--------
209		>>> cmp = Levenshtein()
210		>>> cmp.alignment('cat', 'hat')
211		(1.0, 'cat', 'hat')
212		>>> cmp.alignment('Niall', 'Neil')
213		(3.0, 'N-iall', 'Nei-l-')
214		>>> cmp.alignment('aluminum', 'Catalan')
215		(7.0, '-aluminum', 'Catalan--')
216		>>> cmp.alignment('ATCG', 'TAGC')
217		(3.0, 'ATCG-', '-TAGC')
218
219		>>> cmp = Levenshtein(mode='osa')
220		>>> cmp.alignment('ATCG', 'TAGC')
221		(2.0, 'ATCG', 'TAGC')
222		>>> cmp.alignment('ACTG', 'TAGC')
223		(4.0, 'ACT-G-', '--TAGC')
224
225
226		.. versionadded:: 0.4.1
227
228		"""
229		d_mat, trace_mat = self._alignment_matrix(src, tar, backtrace=True)
230
231		src_aligned = []
232		tar_aligned = []
233
234		src_pos = len(src)
235		tar_pos = len(tar)
236
237		distance = d_mat[src_pos, tar_pos]
238
239		while src_pos and tar_pos:
240	1
241		src_trace, tar_trace = (
242	1	(src_pos, tar_pos - 1),
243	1	(src_pos - 1, tar_pos),
244		(src_pos - 1, tar_pos - 1),
245	1	)[trace_mat[src_pos, tar_pos]]
246	1
247		if src_pos != src_trace and tar_pos != tar_trace:
248	1	src_aligned.append(src[src_trace])
249		tar_aligned.append(tar[tar_trace])
250	1	elif src_pos != src_trace:
251		src_aligned.append(src[src_trace])
252	1	tar_aligned.append('-')
253		else:
254		src_aligned.append('-')
255		tar_aligned.append(tar[tar_trace])
256		src_pos, tar_pos = src_trace, tar_trace
257		while tar_pos:
258	1	tar_pos -= 1
259	1	src_aligned.append('-')
260	1	tar_aligned.append(tar[tar_pos])
261	1	while src_pos:
262	1	src_pos -= 1
263	1	src_aligned.append(src[src_pos])
264		tar_aligned.append('-')
265	1
266	1	return distance, ''.join(src_aligned[::-1]), ''.join(tar_aligned[::-1])
267	1
268	1	def dist_abs(self, src: str, tar: str) -> float:
269	1	"""Return the Levenshtein distance between two strings.
270	1
271	1	Parameters
272	1	----------
273	1	src : str
274	1	Source string for comparison
275	1	tar : str
276		Target string for comparison
277	1
278		Returns
279	1	-------
280		int (may return a float if cost has float values)
281		The Levenshtein distance between src & tar
282
283		Examples
284		--------
285		>>> cmp = Levenshtein()
286		>>> cmp.dist_abs('cat', 'hat')
287		1
288		>>> cmp.dist_abs('Niall', 'Neil')
289		3
290		>>> cmp.dist_abs('aluminum', 'Catalan')
291		7
292		>>> cmp.dist_abs('ATCG', 'TAGC')
293		3
294
295		>>> cmp = Levenshtein(mode='osa')
296		>>> cmp.dist_abs('ATCG', 'TAGC')
297		2
298		>>> cmp.dist_abs('ACTG', 'TAGC')
299		4
300
301
302		.. versionadded:: 0.1.0
303		.. versionchanged:: 0.3.6
304		Encapsulated in class
305
306		"""
307		ins_cost, del_cost, sub_cost, trans_cost = self._cost
308
309		src_len = len(src)
310		tar_len = len(tar)
311		max_len = max(src_len, tar_len)
312
313		if src == tar:
314		return 0
315		if not src:
316		return sum(
317		ins_cost * self._taper(pos, max_len) for pos in range(tar_len)
318	1	)
319		if not tar:
320	1	return sum(
321	1	del_cost * self._taper(pos, max_len) for pos in range(src_len)
322	1	)
323
324	1	d_mat = cast(
325	1	np.ndarray, self._alignment_matrix(src, tar, backtrace=False)
326	1	)
327	1
328		if int(d_mat[src_len, tar_len]) == d_mat[src_len, tar_len]:
329		return int(d_mat[src_len, tar_len])
330	1	else:
331	1	return cast(float, d_mat[src_len, tar_len])
332
333		def dist(self, src: str, tar: str) -> float:
334		"""Return the normalized Levenshtein distance between two strings.
335	1
336		The Levenshtein distance is normalized by dividing the Levenshtein
337	1	distance (calculated by either of the two supported methods) by the
338	1	greater of the number of characters in src times the cost of a delete
339		and the number of characters in tar times the cost of an insert.
340	1	For the case in which all operations have :math:`cost = 1`, this is
341		equivalent to the greater of the length of the two strings src & tar.
342	1
343		Parameters
344		----------
345		src : str
346		Source string for comparison
347		tar : str
348		Target string for comparison
349
350		Returns
351		-------
352		float
353		The normalized Levenshtein distance between src & tar
354
355		Examples
356		--------
357		>>> cmp = Levenshtein()
358		>>> round(cmp.dist('cat', 'hat'), 12)
359		0.333333333333
360		>>> round(cmp.dist('Niall', 'Neil'), 12)
361		0.6
362		>>> cmp.dist('aluminum', 'Catalan')
363		0.875
364		>>> cmp.dist('ATCG', 'TAGC')
365		0.75
366
367
368		.. versionadded:: 0.1.0
369		.. versionchanged:: 0.3.6
370		Encapsulated in class
371
372		"""
373		if src == tar:
374		return 0.0
375		ins_cost, del_cost = self._cost[:2]
376
377		src_len = len(src)
378		tar_len = len(tar)
379
380		if self._taper_enabled:
381		normalize_term = self._normalizer(
382	1	[
383	1	sum(
384	1	self._taper(pos, src_len) * del_cost
385		for pos in range(src_len)
386	1	),
387	1	sum(
388		self._taper(pos, tar_len) * ins_cost
389	1	for pos in range(tar_len)
390	1	),
391		]
392		)
393		else:
394		normalize_term = self._normalizer(
395		[src_len * del_cost, tar_len * ins_cost]
396		)
397
398		return self.dist_abs(src, tar) / normalize_term
399
400
401		if __name__ == '__main__':
402		import doctest
403	1
404		doctest.testmod()
405

chrislit / abydos

abydos.distance._levenshtein A last analyzed 2020-12-31 20:10 UTC

Complexity

Size/Duplication

Test Coverage

Importance

6 Methods

Duplication Side-by-Side

Filter issues like

abydos.distance._levenshtein A
last analyzed 2020-12-31 20:10 UTC