abydos.distance._lcsseq - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-17 01:03 UTC

abydos.distance._lcsseq A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	247
Duplicated Lines	0 %

Test Coverage

Coverage

100%

Importance

Changes

Metric	Value
eloc	46
dl	0
loc	247
ccs	37
cts	37
cp	1
rs	10
c	0
b	0
f	0
wmc	15

3 Functions

Rating	Name	Size	Complexity
A	lcsseq()	30	1
A	sim_lcsseq()	30	1
A	dist_lcsseq()	30	1

2 Methods

Rating	Name	Duplication	Size	Complexity
B	LCSseq.lcsseq()	0	60	8
A	LCSseq.sim()	0	38	4

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.distance._lcsseq.

Longest common subsequence
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from numpy import int as np_int
from numpy import zeros as np_zeros

from ._distance import _Distance

__all__ = ['LCSseq', 'dist_lcsseq', 'lcsseq', 'sim_lcsseq']


class LCSseq(_Distance):

    """Longest common subsequence.

    Longest common subsequence (LCSseq) is the longest subsequence of
    characters that two strings have in common.
    """

    def lcsseq(self, src, tar):
class Foo:
    def some_method(self, x, y):
        return x + y;
        """Return the longest common subsequence of two strings.

        Based on the dynamic programming algorithm from
        http://rosettacode.org/wiki/Longest_common_subsequence
        :cite:`rosettacode:2018b`. This is licensed GFDL 1.2.

        Modifications include:
            conversion to a numpy array in place of a list of lists

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        str
            The longest common subsequence

        Examples
        --------
        >>> sseq = LCSseq()
        >>> sseq.lcsseq('cat', 'hat')
        'at'
        >>> sseq.lcsseq('Niall', 'Neil')
        'Nil'
        >>> sseq.lcsseq('aluminum', 'Catalan')
        'aln'
        >>> sseq.lcsseq('ATCG', 'TAGC')
        'AC'

        """
        lengths = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_int)

        # row 0 and column 0 are initialized to 0 already
        for i, src_char in enumerate(src):
            for j, tar_char in enumerate(tar):
                if src_char == tar_char:
                    lengths[i + 1, j + 1] = lengths[i, j] + 1
                else:
                    lengths[i + 1, j + 1] = max(
                        lengths[i + 1, j], lengths[i, j + 1]
                    )

        # read the substring out from the matrix
        result = ''
        i, j = len(src), len(tar)
        while i != 0 and j != 0:
            if lengths[i, j] == lengths[i - 1, j]:
                i -= 1
            elif lengths[i, j] == lengths[i, j - 1]:
                j -= 1
            else:
                result = src[i - 1] + result
                i -= 1
                j -= 1
        return result

    def sim(self, src, tar):

        r"""Return the longest common subsequence similarity of two strings.

        Longest common subsequence similarity (:math:`sim_{LCSseq}`).

        This employs the LCSseq function to derive a similarity metric:
        :math:`sim_{LCSseq}(s,t) = \frac{|LCSseq(s,t)|}{max(|s|, |t|)}`

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        float
            LCSseq similarity

        Examples
        --------
        >>> sseq = LCSseq()
        >>> sseq.sim('cat', 'hat')
        0.6666666666666666
        >>> sseq.sim('Niall', 'Neil')
        0.6
        >>> sseq.sim('aluminum', 'Catalan')
        0.375
        >>> sseq.sim('ATCG', 'TAGC')
        0.5

        """
        if src == tar:
            return 1.0
        elif not src or not tar:
            return 0.0
        return len(self.lcsseq(src, tar)) / max(len(src), len(tar))


def lcsseq(src, tar):
    """Return the longest common subsequence of two strings.

    This is a wrapper for :py:meth:`LCSseq.lcsseq`.

    Parameters
    ----------
    src : str
        Source string for comparison
    tar : str
        Target string for comparison

    Returns
    -------
    str
        The longest common subsequence

    Examples
    --------
    >>> lcsseq('cat', 'hat')
    'at'
    >>> lcsseq('Niall', 'Neil')
    'Nil'
    >>> lcsseq('aluminum', 'Catalan')
    'aln'
    >>> lcsseq('ATCG', 'TAGC')
    'AC'

    """
    return LCSseq().lcsseq(src, tar)


def sim_lcsseq(src, tar):
    r"""Return the longest common subsequence similarity of two strings.

    This is a wrapper for :py:meth:`LCSseq.sim`.

    Parameters
    ----------
    src : str
        Source string for comparison
    tar : str
        Target string for comparison

    Returns
    -------
    float
        LCSseq similarity

    Examples
    --------
    >>> sim_lcsseq('cat', 'hat')
    0.6666666666666666
    >>> sim_lcsseq('Niall', 'Neil')
    0.6
    >>> sim_lcsseq('aluminum', 'Catalan')
    0.375
    >>> sim_lcsseq('ATCG', 'TAGC')
    0.5

    """
    return LCSseq().sim(src, tar)


def dist_lcsseq(src, tar):
    """Return the longest common subsequence distance between two strings.

    This is a wrapper for :py:meth:`LCSseq.dist`.

    Parameters
    ----------
    src : str
        Source string for comparison
    tar : str
        Target string for comparison

    Returns
    -------
    float
        LCSseq distance

    Examples
    --------
    >>> dist_lcsseq('cat', 'hat')
    0.33333333333333337
    >>> dist_lcsseq('Niall', 'Neil')
    0.4
    >>> dist_lcsseq('aluminum', 'Catalan')
    0.625
    >>> dist_lcsseq('ATCG', 'TAGC')
    0.5

    """
    return LCSseq().dist(src, tar)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.distance._lcsseq.
20
21		Longest common subsequence
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from numpy import int as np_int
32	1	from numpy import zeros as np_zeros
33
34	1	from ._distance import _Distance
35
36	1	__all__ = ['LCSseq', 'dist_lcsseq', 'lcsseq', 'sim_lcsseq']
37
38
39	1	class LCSseq(_Distance):
		0 ignored issues – show Unused Code introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
40		"""Longest common subsequence.
41
42		Longest common subsequence (LCSseq) is the longest subsequence of
43		characters that two strings have in common.
44		"""
45
46	1	def lcsseq(self, src, tar):
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history...
47		"""Return the longest common subsequence of two strings.
48
49		Based on the dynamic programming algorithm from
50		http://rosettacode.org/wiki/Longest_common_subsequence
51		:cite:`rosettacode:2018b`. This is licensed GFDL 1.2.
52
53		Modifications include:
54		conversion to a numpy array in place of a list of lists
55
56		Parameters
57		----------
58		src : str
59		Source string for comparison
60		tar : str
61		Target string for comparison
62
63		Returns
64		-------
65		str
66		The longest common subsequence
67
68		Examples
69		--------
70		>>> sseq = LCSseq()
71		>>> sseq.lcsseq('cat', 'hat')
72		'at'
73		>>> sseq.lcsseq('Niall', 'Neil')
74		'Nil'
75		>>> sseq.lcsseq('aluminum', 'Catalan')
76		'aln'
77		>>> sseq.lcsseq('ATCG', 'TAGC')
78		'AC'
79
80		"""
81	1	lengths = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_int)
82
83		# row 0 and column 0 are initialized to 0 already
84	1	for i, src_char in enumerate(src):
85	1	for j, tar_char in enumerate(tar):
86	1	if src_char == tar_char:
87	1	lengths[i + 1, j + 1] = lengths[i, j] + 1
88		else:
89	1	lengths[i + 1, j + 1] = max(
90		lengths[i + 1, j], lengths[i, j + 1]
91		)
92
93		# read the substring out from the matrix
94	1	result = ''
95	1	i, j = len(src), len(tar)
96	1	while i != 0 and j != 0:
97	1	if lengths[i, j] == lengths[i - 1, j]:
98	1	i -= 1
99	1	elif lengths[i, j] == lengths[i, j - 1]:
100	1	j -= 1
101		else:
102	1	result = src[i - 1] + result
103	1	i -= 1
104	1	j -= 1
105	1	return result
106
107	1	def sim(self, src, tar):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'sim' method Loading history...
108		r"""Return the longest common subsequence similarity of two strings.
109
110		Longest common subsequence similarity (:math:`sim_{LCSseq}`).
111
112		This employs the LCSseq function to derive a similarity metric:
113		:math:`sim_{LCSseq}(s,t) = \frac{\|LCSseq(s,t)\|}{max(\|s\|, \|t\|)}`
114
115		Parameters
116		----------
117		src : str
118		Source string for comparison
119		tar : str
120		Target string for comparison
121
122		Returns
123		-------
124		float
125		LCSseq similarity
126
127		Examples
128		--------
129		>>> sseq = LCSseq()
130		>>> sseq.sim('cat', 'hat')
131		0.6666666666666666
132		>>> sseq.sim('Niall', 'Neil')
133		0.6
134		>>> sseq.sim('aluminum', 'Catalan')
135		0.375
136		>>> sseq.sim('ATCG', 'TAGC')
137		0.5
138
139		"""
140	1	if src == tar:
141	1	return 1.0
142	1	elif not src or not tar:
143	1	return 0.0
144	1	return len(self.lcsseq(src, tar)) / max(len(src), len(tar))
145
146
147	1	def lcsseq(src, tar):
148		"""Return the longest common subsequence of two strings.
149
150		This is a wrapper for :py:meth:`LCSseq.lcsseq`.
151
152		Parameters
153		----------
154		src : str
155		Source string for comparison
156		tar : str
157		Target string for comparison
158
159		Returns
160		-------
161		str
162		The longest common subsequence
163
164		Examples
165		--------
166		>>> lcsseq('cat', 'hat')
167		'at'
168		>>> lcsseq('Niall', 'Neil')
169		'Nil'
170		>>> lcsseq('aluminum', 'Catalan')
171		'aln'
172		>>> lcsseq('ATCG', 'TAGC')
173		'AC'
174
175		"""
176	1	return LCSseq().lcsseq(src, tar)
177
178
179	1	def sim_lcsseq(src, tar):
180		r"""Return the longest common subsequence similarity of two strings.
181
182		This is a wrapper for :py:meth:`LCSseq.sim`.
183
184		Parameters
185		----------
186		src : str
187		Source string for comparison
188		tar : str
189		Target string for comparison
190
191		Returns
192		-------
193		float
194		LCSseq similarity
195
196		Examples
197		--------
198		>>> sim_lcsseq('cat', 'hat')
199		0.6666666666666666
200		>>> sim_lcsseq('Niall', 'Neil')
201		0.6
202		>>> sim_lcsseq('aluminum', 'Catalan')
203		0.375
204		>>> sim_lcsseq('ATCG', 'TAGC')
205		0.5
206
207		"""
208	1	return LCSseq().sim(src, tar)
209
210
211	1	def dist_lcsseq(src, tar):
212		"""Return the longest common subsequence distance between two strings.
213
214		This is a wrapper for :py:meth:`LCSseq.dist`.
215
216		Parameters
217		----------
218		src : str
219		Source string for comparison
220		tar : str
221		Target string for comparison
222
223		Returns
224		-------
225		float
226		LCSseq distance
227
228		Examples
229		--------
230		>>> dist_lcsseq('cat', 'hat')
231		0.33333333333333337
232		>>> dist_lcsseq('Niall', 'Neil')
233		0.4
234		>>> dist_lcsseq('aluminum', 'Catalan')
235		0.625
236		>>> dist_lcsseq('ATCG', 'TAGC')
237		0.5
238
239		"""
240	1	return LCSseq().dist(src, tar)
241
242
243		if __name__ == '__main__':
244		import doctest
245
246		doctest.testmod()
247

chrislit / abydos

Pull Request — master (#141)

abydos.distance._lcsseq A

Complexity

Size/Duplication

Test Coverage

Importance

3 Functions

2 Methods

Duplication Side-by-Side

Filter issues like