abydos.fingerprint._lc_cutter.LCCutter.fingerprint() - Code Metrics - Inspection of "0.4.1" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#225)

by Chris

created 2019-07-12 05:47 UTC

LCCutter.fingerprint() F

↳ Parent: abydos.fingerprint._lc_cutter

Complexity

Conditions

Size

Total Lines	98
Code Lines	50

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	50
CRAP Score	21

Importance

Changes

Metric	Value
eloc	50
dl	0
loc	98
ccs	50
cts	50
cp	1
rs	0
c	0
b	0
f	0
cc	21
nop	2
crap	21

How to fix Long Method Complexity

# -*- coding: utf-8 -*-

# Copyright 2019 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.fingerprint._lc_cutter.

Library of Congress Cutter table encoding
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from ._fingerprint import _Fingerprint

__all__ = ['LCCutter']


class LCCutter(_Fingerprint):
    """Library of Congress Cutter table encoding.

    This is based on the Library of Congress Cutter table encoding scheme, as
    described at https://www.loc.gov/aba/pcc/053/table.html :cite:`LOC:2013`.
    Handling for numerals is not included.

    .. versionadded:: 0.4.1
    """

    _vowels = set('AEIOU')
    _after_initial_vowel = ['C', 'K', 'M', 'O', 'Q', 'R', 'T']
    _after_initial_s = ['C', 'D', 'G', 'L', 'S', 'T', 'U']
    _after_initial_qu = ['D', 'H', 'N', 'Q', 'S', 'X']
    _after_initial_cons = ['D', 'H', 'N', 'Q', 'T', 'X']

    _expansions = ['D', 'H', 'L', 'O', 'S', 'V']

    def __init__(self, max_length=64):
        """Initialize LCCutter instance.

        Parameters
        ----------
        max_length : int
            The length of the code returned (defaults to 64)


        .. versionadded:: 0.4.1

        """
        # Require a max_length of at least 2 and not more than 64
        if max_length != -1:
            self._max_length = min(max(2, max_length), 64)
        else:
            self._max_length = 64

    def fingerprint(self, word):
        """Return the Library of Congress Cutter table encoding of a word.

        Parameters
        ----------
        word : str
            The word to fingerprint

        Returns
        -------
        str
            The Library of Congress Cutter table encoding

        Examples
        --------
        >>> cf = LCCutter()
        >>> cf.fingerprint('hat')
        'H38'
        >>> cf.fingerprint('niall')
        'N5355'
        >>> cf.fingerprint('colin')
        'C6556'
        >>> cf.fingerprint('atcg')
        'A834'
        >>> cf.fingerprint('entreatment')
        'E5874386468'


        .. versionadded:: 0.4.1

        """
        # uppercase
        uc = ''.join(letter for letter in word.upper() if letter.isalpha())

        if not uc:
            return ''

        code = uc[0]

        # length 1
        if len(uc) == 1:
            return code

        # length 2+
        code = [code]

        # first cutter
        pos = 1
        if uc[0] in self._vowels:
            cval = 2
            for letter in self._after_initial_vowel:
                if uc[1] > letter:
                    cval += 1
                else:
                    break
        elif uc[0] == 'S':
            cval = 2
            for letter in self._after_initial_s:
                if uc[1] > letter:
                    cval += 1
                elif uc[1] == 'C' and uc[1:3] < 'CI':
                    cval += 1
                    pos += 1
                    break
                else:
                    break
        elif uc[0:2] == 'QU':
            cval = 3
            pos += 1
            for letter in self._after_initial_qu:
                if uc[2:3] > letter:
                    cval += 1
                else:
                    break
        elif 'QA' <= uc[0:2] <= 'QT':
            cval = 2
        else:
            cval = 3
            for letter in self._after_initial_cons:
                if uc[1] > letter:
                    cval += 1
                else:
                    break
        code.append(str(cval))

        # length 3+
        for ch in uc[pos + 1 :]:
            if len(code) >= self._max_length:
                break
            cval = 3
            for letter in self._expansions:
                if ch > letter:
                    cval += 1
                else:
                    break
            code.append(str(cval))

        return ''.join(code[: self._max_length])


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2019 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.fingerprint._lc_cutter.
20
21		Library of Congress Cutter table encoding
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from ._fingerprint import _Fingerprint
32
33	1	__all__ = ['LCCutter']
34
35
36	1	class LCCutter(_Fingerprint):
37		"""Library of Congress Cutter table encoding.
38
39		This is based on the Library of Congress Cutter table encoding scheme, as
40		described at https://www.loc.gov/aba/pcc/053/table.html :cite:`LOC:2013`.
41		Handling for numerals is not included.
42
43		.. versionadded:: 0.4.1
44		"""
45
46	1	_vowels = set('AEIOU')
47	1	_after_initial_vowel = ['C', 'K', 'M', 'O', 'Q', 'R', 'T']
48	1	_after_initial_s = ['C', 'D', 'G', 'L', 'S', 'T', 'U']
49	1	_after_initial_qu = ['D', 'H', 'N', 'Q', 'S', 'X']
50	1	_after_initial_cons = ['D', 'H', 'N', 'Q', 'T', 'X']
51
52	1	_expansions = ['D', 'H', 'L', 'O', 'S', 'V']
53
54	1	def __init__(self, max_length=64):
55		"""Initialize LCCutter instance.
56
57		Parameters
58		----------
59		max_length : int
60		The length of the code returned (defaults to 64)
61
62
63		.. versionadded:: 0.4.1
64
65		"""
66		# Require a max_length of at least 2 and not more than 64
67	1	if max_length != -1:
68	1	self._max_length = min(max(2, max_length), 64)
69		else:
70	1	self._max_length = 64
71
72	1	def fingerprint(self, word):
73		"""Return the Library of Congress Cutter table encoding of a word.
74
75		Parameters
76		----------
77		word : str
78		The word to fingerprint
79
80		Returns
81		-------
82		str
83		The Library of Congress Cutter table encoding
84
85		Examples
86		--------
87		>>> cf = LCCutter()
88		>>> cf.fingerprint('hat')
89		'H38'
90		>>> cf.fingerprint('niall')
91		'N5355'
92		>>> cf.fingerprint('colin')
93		'C6556'
94		>>> cf.fingerprint('atcg')
95		'A834'
96		>>> cf.fingerprint('entreatment')
97		'E5874386468'
98
99
100		.. versionadded:: 0.4.1
101
102		"""
103		# uppercase
104	1	uc = ''.join(letter for letter in word.upper() if letter.isalpha())
105
106	1	if not uc:
107	1	return ''
108
109	1	code = uc[0]
110
111		# length 1
112	1	if len(uc) == 1:
113	1	return code
114
115		# length 2+
116	1	code = [code]
117
118		# first cutter
119	1	pos = 1
120	1	if uc[0] in self._vowels:
121	1	cval = 2
122	1	for letter in self._after_initial_vowel:
123	1	if uc[1] > letter:
124	1	cval += 1
125		else:
126	1	break
127	1	elif uc[0] == 'S':
128	1	cval = 2
129	1	for letter in self._after_initial_s:
130	1	if uc[1] > letter:
131	1	cval += 1
132	1	elif uc[1] == 'C' and uc[1:3] < 'CI':
133	1	cval += 1
134	1	pos += 1
135	1	break
136		else:
137	1	break
138	1	elif uc[0:2] == 'QU':
139	1	cval = 3
140	1	pos += 1
141	1	for letter in self._after_initial_qu:
142	1	if uc[2:3] > letter:
143	1	cval += 1
144		else:
145	1	break
146	1	elif 'QA' <= uc[0:2] <= 'QT':
147	1	cval = 2
148		else:
149	1	cval = 3
150	1	for letter in self._after_initial_cons:
151	1	if uc[1] > letter:
152	1	cval += 1
153		else:
154	1	break
155	1	code.append(str(cval))
156
157		# length 3+
158	1	for ch in uc[pos + 1 :]:
159	1	if len(code) >= self._max_length:
160	1	break
161	1	cval = 3
162	1	for letter in self._expansions:
163	1	if ch > letter:
164	1	cval += 1
165		else:
166	1	break
167	1	code.append(str(cval))
168
169	1	return ''.join(code[: self._max_length])
170
171
172		if __name__ == '__main__':
173		import doctest
174
175		doctest.testmod()
176

chrislit / abydos

Pull Request — master (#225)

LCCutter.fingerprint() F

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like