abydos.fingerprint._lc_cutter.LCCutter.fingerprint() - Code Metrics - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

LCCutter.fingerprint() F
last analyzed 2020-12-31 20:10 UTC

↳ Parent: abydos.fingerprint._lc_cutter

Complexity

Conditions

Size

Total Lines	98
Code Lines	50

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	47
CRAP Score	21

Importance

Changes

Metric	Value
eloc	50
dl	0
loc	98
ccs	47
cts	47
cp	1
rs	0
c	0
b	0
f	0
cc	21
nop	2
crap	21

How to fix Long Method Complexity

# Copyright 2019-2020 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.fingerprint._lc_cutter.

Library of Congress Cutter table encoding
"""

from ._fingerprint import _Fingerprint

__all__ = ['LCCutter']


class LCCutter(_Fingerprint):
    """Library of Congress Cutter table encoding.

    This is based on the Library of Congress Cutter table encoding scheme, as
    described at https://www.loc.gov/aba/pcc/053/table.html :cite:`LOC:2013`.
    Handling for numerals is not included.

    .. versionadded:: 0.4.1
    """

    _vowels = set('AEIOU')
    _after_initial_vowel = ['C', 'K', 'M', 'O', 'Q', 'R', 'T']
    _after_initial_s = ['C', 'D', 'G', 'L', 'S', 'T', 'U']
    _after_initial_qu = ['D', 'H', 'N', 'Q', 'S', 'X']
    _after_initial_cons = ['D', 'H', 'N', 'Q', 'T', 'X']

    _expansions = ['D', 'H', 'L', 'O', 'S', 'V']

    def __init__(self, max_length: int = 64) -> None:
        """Initialize LCCutter instance.

        Parameters
        ----------
        max_length : int
            The length of the code returned (defaults to 64)


        .. versionadded:: 0.4.1

        """
        super(LCCutter, self).__init__()
        # Require a max_length of at least 2 and not more than 64
        if max_length != -1:
            self._max_length = min(max(2, max_length), 64)
        else:
            self._max_length = 64

    def fingerprint(self, word: str) -> str:
        """Return the Library of Congress Cutter table encoding of a word.

        Parameters
        ----------
        word : str
            The word to fingerprint

        Returns
        -------
        str
            The Library of Congress Cutter table encoding

        Examples
        --------
        >>> cf = LCCutter()
        >>> cf.fingerprint('hat')
        'H38'
        >>> cf.fingerprint('niall')
        'N5355'
        >>> cf.fingerprint('colin')
        'C6556'
        >>> cf.fingerprint('atcg')
        'A834'
        >>> cf.fingerprint('entreatment')
        'E5874386468'


        .. versionadded:: 0.4.1

        """
        # uppercase
        uc = ''.join(letter for letter in word.upper() if letter.isalpha())

        if not uc:
            return ''

        code = uc[0]

        # length 1
        if len(uc) == 1:
            return code

        # length 2+
        code_list = [code]

        # first cutter
        pos = 1
        if uc[0] in self._vowels:
            cval = 2
            for letter in self._after_initial_vowel:
                if uc[1] > letter:
                    cval += 1
                else:
                    break
        elif uc[0] == 'S':
            cval = 2
            for letter in self._after_initial_s:
                if uc[1] > letter:
                    cval += 1
                elif uc[1] == 'C' and uc[1:3] < 'CI':
                    cval += 1
                    pos += 1
                    break
                else:
                    break
        elif uc[0:2] == 'QU':
            cval = 3
            pos += 1
            for letter in self._after_initial_qu:
                if uc[2:3] > letter:
                    cval += 1
                else:
                    break
        elif 'QA' <= uc[0:2] <= 'QT':
            cval = 2
        else:
            cval = 3
            for letter in self._after_initial_cons:
                if uc[1] > letter:
                    cval += 1
                else:
                    break
        code_list.append(str(cval))

        # length 3+
        for ch in uc[pos + 1 :]:
            if len(code_list) >= self._max_length:
                break
            cval = 3
            for letter in self._expansions:
                if ch > letter:
                    cval += 1
                else:
                    break
            code_list.append(str(cval))

        return ''.join(code_list[: self._max_length])


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# Copyright 2019-2020 by Christopher C. Little.
2		# This file is part of Abydos.
3		#
4		# Abydos is free software: you can redistribute it and/or modify
5		# it under the terms of the GNU General Public License as published by
6		# the Free Software Foundation, either version 3 of the License, or
7		# (at your option) any later version.
8		#
9		# Abydos is distributed in the hope that it will be useful,
10		# but WITHOUT ANY WARRANTY; without even the implied warranty of
11		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12		# GNU General Public License for more details.
13		#
14		# You should have received a copy of the GNU General Public License
15		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17		"""abydos.fingerprint._lc_cutter.
18
19	1	Library of Congress Cutter table encoding
20		"""
21
22		from ._fingerprint import _Fingerprint
23
24	1	__all__ = ['LCCutter']
25
26
27		class LCCutter(_Fingerprint):
28		"""Library of Congress Cutter table encoding.
29
30		This is based on the Library of Congress Cutter table encoding scheme, as
31	1	described at https://www.loc.gov/aba/pcc/053/table.html :cite:`LOC:2013`.
32		Handling for numerals is not included.
33	1
34		.. versionadded:: 0.4.1
35		"""
36	1
37		_vowels = set('AEIOU')
38		_after_initial_vowel = ['C', 'K', 'M', 'O', 'Q', 'R', 'T']
39		_after_initial_s = ['C', 'D', 'G', 'L', 'S', 'T', 'U']
40		_after_initial_qu = ['D', 'H', 'N', 'Q', 'S', 'X']
41		_after_initial_cons = ['D', 'H', 'N', 'Q', 'T', 'X']
42
43		_expansions = ['D', 'H', 'L', 'O', 'S', 'V']
44
45		def __init__(self, max_length: int = 64) -> None:
46	1	"""Initialize LCCutter instance.
47	1
48	1	Parameters
49	1	----------
50	1	max_length : int
51		The length of the code returned (defaults to 64)
52	1
53
54	1	.. versionadded:: 0.4.1
55
56		"""
57		super(LCCutter, self).__init__()
58		# Require a max_length of at least 2 and not more than 64
59		if max_length != -1:
60		self._max_length = min(max(2, max_length), 64)
61		else:
62		self._max_length = 64
63
64		def fingerprint(self, word: str) -> str:
65		"""Return the Library of Congress Cutter table encoding of a word.
66
67	1	Parameters
68	1	----------
69		word : str
70	1	The word to fingerprint
71
72	1	Returns
73		-------
74		str
75		The Library of Congress Cutter table encoding
76
77		Examples
78		--------
79		>>> cf = LCCutter()
80		>>> cf.fingerprint('hat')
81		'H38'
82		>>> cf.fingerprint('niall')
83		'N5355'
84		>>> cf.fingerprint('colin')
85		'C6556'
86		>>> cf.fingerprint('atcg')
87		'A834'
88		>>> cf.fingerprint('entreatment')
89		'E5874386468'
90
91
92		.. versionadded:: 0.4.1
93
94		"""
95		# uppercase
96		uc = ''.join(letter for letter in word.upper() if letter.isalpha())
97
98		if not uc:
99		return ''
100
101		code = uc[0]
102
103		# length 1
104	1	if len(uc) == 1:
105		return code
106	1
107	1	# length 2+
108		code_list = [code]
109	1
110		# first cutter
111		pos = 1
112	1	if uc[0] in self._vowels:
113	1	cval = 2
114		for letter in self._after_initial_vowel:
115		if uc[1] > letter:
116	1	cval += 1
117		else:
118		break
119	1	elif uc[0] == 'S':
120	1	cval = 2
121	1	for letter in self._after_initial_s:
122	1	if uc[1] > letter:
123	1	cval += 1
124	1	elif uc[1] == 'C' and uc[1:3] < 'CI':
125		cval += 1
126	1	pos += 1
127	1	break
128	1	else:
129	1	break
130	1	elif uc[0:2] == 'QU':
131	1	cval = 3
132	1	pos += 1
133	1	for letter in self._after_initial_qu:
134	1	if uc[2:3] > letter:
135	1	cval += 1
136		else:
137	1	break
138	1	elif 'QA' <= uc[0:2] <= 'QT':
139	1	cval = 2
140	1	else:
141	1	cval = 3
142	1	for letter in self._after_initial_cons:
143	1	if uc[1] > letter:
144		cval += 1
145	1	else:
146	1	break
147	1	code_list.append(str(cval))
148
149	1	# length 3+
150	1	for ch in uc[pos + 1 :]:
151	1	if len(code_list) >= self._max_length:
152	1	break
153		cval = 3
154	1	for letter in self._expansions:
155	1	if ch > letter:
156		cval += 1
157		else:
158	1	break
159	1	code_list.append(str(cval))
160	1
161	1	return ''.join(code_list[: self._max_length])
162	1
163	1
164	1	if __name__ == '__main__':
165		import doctest
166	1
167		doctest.testmod()
168

chrislit / abydos

LCCutter.fingerprint() F last analyzed 2020-12-31 20:10 UTC

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like

LCCutter.fingerprint() F
last analyzed 2020-12-31 20:10 UTC