abydos.fingerprint._Synoname.SynonameToolcode.fingerprint() - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-10 01:31 UTC

SynonameToolcode.fingerprint() F

↳ Parent: abydos.fingerprint._Synoname

Complexity

Conditions

Size

Total Lines	209
Code Lines	128

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	110
CRAP Score	51

Importance

Changes

Metric	Value
cc	51
eloc	128
nop	5
dl	0
loc	209
ccs	110
cts	110
cp	1
crap	51
rs	0
c	0
b	0
f	0

How to fix Long Method Complexity

# -*- coding: utf-8 -*-


# Copyright 2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.fingerprint._Synoname.

Synoname toolcode.
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from ._Fingerprint import Fingerprint

__all__ = ['SynonameToolcode', 'synoname_toolcode']


class SynonameToolcode(Fingerprint):

    """Synoname Toolcode.

    Cf. :cite:`Getty:1991,Gross:1991`.
    """

    synoname_special_table = (
        # Roman, match, extra, method
        (False, 'NONE', '', 0),
        (False, 'aine', '', 3),
        (False, 'also erroneously', '', 4),
        (False, 'also identified with the', '', 2),
        (False, 'also identified with', '', 2),
        (False, 'archbishop', '', 7),
        (False, 'atelier', '', 7),
        (False, 'baron', '', 7),
        (False, 'cadet', '', 3),
        (False, 'cardinal', '', 7),
        (False, 'circle of', '', 5),
        (False, 'circle', '', 5),
        (False, 'class of', '', 5),
        (False, 'conde de', '', 7),
        (False, 'countess', '', 7),
        (False, 'count', '', 7),
        (False, "d'", " d'", 15),
        (False, 'dai', '', 15),
        (False, "dall'", " dall'", 15),
        (False, 'dalla', '', 15),
        (False, 'dalle', '', 15),
        (False, 'dal', '', 15),
        (False, 'da', '', 15),
        (False, 'degli', '', 15),
        (False, 'della', '', 15),
        (False, 'del', '', 15),
        (False, 'den', '', 15),
        (False, 'der altere', '', 3),
        (False, 'der jungere', '', 3),
        (False, 'der', '', 15),
        (False, 'de la', '', 15),
        (False, 'des', '', 15),
        (False, "de'", " de'", 15),
        (False, 'de', '', 15),
        (False, 'di ser', '', 7),
        (False, 'di', '', 15),
        (False, 'dos', '', 15),
        (False, 'du', '', 15),
        (False, 'duke of', '', 7),
        (False, 'earl of', '', 7),
        (False, 'el', '', 15),
        (False, 'fils', '', 3),
        (False, 'florentine follower of', '', 5),
        (False, 'follower of', '', 5),
        (False, 'fra', '', 7),
        (False, 'freiherr von', '', 7),
        (False, 'giovane', '', 7),
        (False, 'group', '', 5),
        (True, 'iii', '', 3),
        (True, 'ii', '', 3),
        (False, 'il giovane', '', 7),
        (False, 'il vecchio', '', 7),
        (False, 'il', '', 15),
        (False, "in't", '', 7),
        (False, 'in het', '', 7),
        (True, 'iv', '', 3),
        (True, 'ix', '', 3),
        (True, 'i', '', 3),
        (False, 'jr.', '', 3),
        (False, 'jr', '', 3),
        (False, 'juniore', '', 3),
        (False, 'junior', '', 3),
        (False, 'king of', '', 7),
        (False, "l'", " l'", 15),
        (False, "l'aine", '', 3),
        (False, 'la', '', 15),
        (False, 'le jeune', '', 3),
        (False, 'le', '', 15),
        (False, 'lo', '', 15),
        (False, 'maestro', '', 7),
        (False, 'maitre', '', 7),
        (False, 'marchioness', '', 7),
        (False, 'markgrafin von', '', 7),
        (False, 'marquess', '', 7),
        (False, 'marquis', '', 7),
        (False, 'master of the', '', 7),
        (False, 'master of', '', 7),
        (False, 'master known as the', '', 7),
        (False, 'master with the', '', 7),
        (False, 'master with', '', 7),
        (False, 'masters', '', 7),
        (False, 'master', '', 7),
        (False, 'meister', '', 7),
        (False, 'met de', '', 7),
        (False, 'met', '', 7),
        (False, 'mlle.', '', 7),
        (False, 'mlle', '', 7),
        (False, 'monogrammist', '', 7),
        (False, 'monsu', '', 7),
        (False, 'nee', '', 2),
        (False, 'of', '', 3),
        (False, 'oncle', '', 3),
        (False, 'op den', '', 15),
        (False, 'op de', '', 15),
        (False, 'or', '', 2),
        (False, 'over den', '', 15),
        (False, 'over de', '', 15),
        (False, 'over', '', 7),
        (False, 'p.re', '', 7),
        (False, 'p.r.a.', '', 1),
        (False, 'padre', '', 7),
        (False, 'painter', '', 7),
        (False, 'pere', '', 3),
        (False, 'possibly identified with', '', 6),
        (False, 'possibly', '', 6),
        (False, 'pseudo', '', 15),
        (False, 'r.a.', '', 1),
        (False, 'reichsgraf von', '', 7),
        (False, 'ritter von', '', 7),
        (False, 'sainte-', ' sainte-', 8),
        (False, 'sainte', '', 7),
        (False, 'saint-', ' saint-', 8),
        (False, 'saint', '', 7),
        (False, 'santa', '', 15),
        (False, "sant'", " sant'", 15),
        (False, 'san', '', 15),
        (False, 'ser', '', 7),
        (False, 'seniore', '', 3),
        (False, 'senior', '', 3),
        (False, 'sir', '', 5),
        (False, 'sr.', '', 3),
        (False, 'sr', '', 3),
        (False, 'ss.', ' ss.', 14),
        (False, 'ss', '', 6),
        (False, 'st-', ' st-', 8),
        (False, 'st.', ' st.', 15),
        (False, 'ste-', ' ste-', 8),
        (False, 'ste.', ' ste.', 15),
        (False, 'studio', '', 7),
        (False, 'sub-group', '', 5),
        (False, 'sultan of', '', 7),
        (False, 'ten', '', 15),
        (False, 'ter', '', 15),
        (False, 'the elder', '', 3),
        (False, 'the younger', '', 3),
        (False, 'the', '', 7),
        (False, 'tot', '', 15),
        (False, 'unidentified', '', 1),
        (False, 'van den', '', 15),
        (False, 'van der', '', 15),
        (False, 'van de', '', 15),
        (False, 'vanden', '', 15),
        (False, 'vander', '', 15),
        (False, 'van', '', 15),
        (False, 'vecchia', '', 7),
        (False, 'vecchio', '', 7),
        (True, 'viii', '', 3),
        (True, 'vii', '', 3),
        (True, 'vi', '', 3),
        (True, 'v', '', 3),
        (False, 'vom', '', 7),
        (False, 'von', '', 15),
        (False, 'workshop', '', 7),
        (True, 'xiii', '', 3),
        (True, 'xii', '', 3),
        (True, 'xiv', '', 3),
        (True, 'xix', '', 3),
        (True, 'xi', '', 3),
        (True, 'xviii', '', 3),
        (True, 'xvii', '', 3),
        (True, 'xvi', '', 3),
        (True, 'xv', '', 3),
        (True, 'xx', '', 3),
        (True, 'x', '', 3),
        (False, 'y', '', 7),
    )

    method_dict = {
        'end': 1,
        'middle': 2,
        'beginning': 4,
        'beginning_no_space': 8,
    }

    # Fill field 0 (qualifier)
    qual_3 = {
        'adaptation after',
        'after',
        'assistant of',
        'assistants of',
        'circle of',
        'follower of',
        'imitator of',
        'in the style of',
        'manner of',
        'pupil of',
        'school of',
        'studio of',
        'style of',
        'workshop of',
    }
    qual_2 = {'copy after', 'copy after?', 'copy of'}
    qual_1 = {
        'ascribed to',
        'attributed to or copy after',
        'attributed to',
        'possibly',
    }

    # Fill field 2 (generation)
    gen_1 = (
        'the elder',
        ' sr.',
        ' sr',
        'senior',
        'der altere',
        'il vecchio',
        "l'aine",
        'p.re',
        'padre',
        'seniore',
        'vecchia',
        'vecchio',
    )
    gen_2 = (
        ' jr.',
        ' jr',
        'der jungere',
        'il giovane',
        'giovane',
        'juniore',
        'junior',
        'le jeune',
        'the younger',
    )

    def fingerprint(self, lname, fname='', qual='', normalize=0):

        """Build the Synoname toolcode.

        Args:
            lname (str): Last name
            fname (str): First name (can be blank)
            qual (str): Qualifier
            normalize (int): Normalization mode (0, 1, or 2)

        Returns:
            tuple: The transformed names and the synoname toolcode

        Examples:
            >>> st = SynonameToolcode()
            >>> st.fingerprint('hat')
            ('hat', '', '0000000003$$h')
            >>> st.fingerprint('niall')
            ('niall', '', '0000000005$$n')
            >>> st.fingerprint('colin')
            ('colin', '', '0000000005$$c')
            >>> st.fingerprint('atcg')
            ('atcg', '', '0000000004$$a')
            >>> st.fingerprint('entreatment')
            ('entreatment', '', '0000000011$$e')

            >>> st.fingerprint('Ste.-Marie', 'Count John II', normalize=2)
            ('ste.-marie ii', 'count john', '0200491310$015b049a127c$smcji')
            >>> st.fingerprint('Michelangelo IV', '', 'Workshop of')
            ('michelangelo iv', '', '3000550015$055b$mi')

        """
        lname = lname.lower()
        fname = fname.lower()
        qual = qual.lower()

        # Start with the basic code
        toolcode = ['0', '0', '0', '000', '00', '00', '$', '', '$', '']

        full_name = ' '.join((lname, fname))

        if qual in self.qual_3:
            toolcode[0] = '3'
        elif qual in self.qual_2:
            toolcode[0] = '2'
        elif qual in self.qual_1:
            toolcode[0] = '1'

        # Fill field 1 (punctuation)
        if '.' in full_name:
            toolcode[1] = '2'
        else:
            for punct in ',-/:;"&\'()!{|}?$%*+<=>[\\]^_`~':
                if punct in full_name:
                    toolcode[1] = '1'
                    break

        elderyounger = ''  # save elder/younger for possible movement later
        for gen in self.gen_1:
            if gen in full_name:
                toolcode[2] = '1'
                elderyounger = gen
                break
        else:
            for gen in self.gen_2:
                if gen in full_name:
                    toolcode[2] = '2'
                    elderyounger = gen
                    break

        # do comma flip
        if normalize:
            comma = lname.find(',')
            if comma != -1:
                lname_end = lname[comma + 1 :]
                while lname_end[0] in {' ', ','}:
                    lname_end = lname_end[1:]
                fname = lname_end + ' ' + fname
                lname = lname[:comma].strip()

        # do elder/younger move
        if normalize == 2 and elderyounger:
            elderyounger_loc = fname.find(elderyounger)
            if elderyounger_loc != -1:
                lname = ' '.join((lname, elderyounger.strip()))
                fname = ' '.join(
                    (
                        fname[:elderyounger_loc].strip(),
                        fname[elderyounger_loc + len(elderyounger) :],
                    )
                ).strip()

        toolcode[4] = '{:02d}'.format(len(fname))
        toolcode[5] = '{:02d}'.format(len(lname))

        # strip punctuation
        for char in ',/:;"&()!{|}?$%*+<=>[\\]^_`~':
            full_name = full_name.replace(char, '')
        for pos, char in enumerate(full_name):
            if char == '-' and full_name[pos - 1 : pos + 2] != 'b-g':
                full_name = full_name[:pos] + ' ' + full_name[pos + 1 :]

        # Fill field 9 (search range)
        for letter in [_[0] for _ in full_name.split()]:
            if letter not in toolcode[9]:
                toolcode[9] += letter
            if len(toolcode[9]) == 15:
                break

        def roman_check(numeral, fname, lname):
            """Move Roman numerals from first name to last.

            Args:
                numeral (str): Roman numeral
                fname (str): First name
                lname (str): Last name

            Returns:
                tuple: First and last names with Roman numeral moved

            """
            loc = fname.find(numeral)
            if fname and (
                loc != -1

                and (len(fname[loc:]) == len(numeral))

                or fname[loc + len(numeral)] in {' ', ','}

            ):
                lname = ' '.join((lname, numeral))
                fname = ' '.join(
                    (
                        fname[:loc].strip(),
                        fname[loc + len(numeral) :].lstrip(' ,'),
                    )
                )
            return fname.strip(), lname.strip()

        # Fill fields 7 (specials) and 3 (roman numerals)
        for num, special in enumerate(self.synoname_special_table):

            roman, match, extra, method = special
            if method & self.method_dict['end']:
                match_context = ' ' + match
                loc = full_name.find(match_context)
                if (len(full_name) > len(match_context)) and (
                    loc == len(full_name) - len(match_context)

                ):
                    if roman:
                        if not any(
                            abbr in fname for abbr in ('i.', 'v.', 'x.')

                        ):
                            full_name = full_name[:loc]
                            toolcode[7] += '{:03d}'.format(num) + 'a'
                            if toolcode[3] == '000':
                                toolcode[3] = '{:03d}'.format(num)
                            if normalize == 2:
                                fname, lname = roman_check(match, fname, lname)
                    else:
                        full_name = full_name[:loc]
                        toolcode[7] += '{:03d}'.format(num) + 'a'
            if method & self.method_dict['middle']:
                match_context = ' ' + match + ' '
                loc = 0
                while loc != -1:
                    loc = full_name.find(match_context, loc + 1)
                    if loc > 0:
                        if roman:
                            if not any(
                                abbr in fname for abbr in ('i.', 'v.', 'x.')

                            ):
                                full_name = (
                                    full_name[:loc]
                                    + full_name[loc + len(match) + 1 :]
                                )
                                toolcode[7] += '{:03d}'.format(num) + 'b'
                                if toolcode[3] == '000':
                                    toolcode[3] = '{:03d}'.format(num)
                                if normalize == 2:
                                    fname, lname = roman_check(
                                        match, fname, lname
                                    )
                        else:
                            full_name = (
                                full_name[:loc]
                                + full_name[loc + len(match) + 1 :]
                            )
                            toolcode[7] += '{:03d}'.format(num) + 'b'
            if method & self.method_dict['beginning']:
                match_context = match + ' '
                loc = full_name.find(match_context)
                if loc == 0:
                    full_name = full_name[len(match) + 1 :]
                    toolcode[7] += '{:03d}'.format(num) + 'c'
            if method & self.method_dict['beginning_no_space']:
                loc = full_name.find(match)
                if loc == 0:
                    toolcode[7] += '{:03d}'.format(num) + 'd'
                    if full_name[: len(match)] not in toolcode[9]:
                        toolcode[9] += full_name[: len(match)]

            if extra:
                loc = full_name.find(extra)
                if loc != -1:
                    toolcode[7] += '{:03d}'.format(num) + 'X'
                    # Since extras are unique, we only look for each of them
                    # once, and they include otherwise impossible characters
                    # for this field, it's not possible for the following line
                    # to have ever been false.
                    # if full_name[loc:loc+len(extra)] not in toolcode[9]:
                    toolcode[9] += full_name[loc : loc + len(match)]

        return lname, fname, ''.join(toolcode)


def synoname_toolcode(lname, fname='', qual='', normalize=0):
    """Build the Synoname toolcode.

    This is a wrapper for :py:meth:`SynonameToolcode.fingerprint`.

    Args:
        lname (str): Last name
        fname (str): First name (can be blank)
        qual (str): Qualifier
        normalize (int): Normalization mode (0, 1, or 2)

    Returns:
        tuple: The transformed names and the synoname toolcode

    Examples:
        >>> synoname_toolcode('hat')
        ('hat', '', '0000000003$$h')
        >>> synoname_toolcode('niall')
        ('niall', '', '0000000005$$n')
        >>> synoname_toolcode('colin')
        ('colin', '', '0000000005$$c')
        >>> synoname_toolcode('atcg')
        ('atcg', '', '0000000004$$a')
        >>> synoname_toolcode('entreatment')
        ('entreatment', '', '0000000011$$e')

        >>> synoname_toolcode('Ste.-Marie', 'Count John II', normalize=2)
        ('ste.-marie ii', 'count john', '0200491310$015b049a127c$smcji')
        >>> synoname_toolcode('Michelangelo IV', '', 'Workshop of')
        ('michelangelo iv', '', '3000550015$055b$mi')

    """
    return SynonameToolcode().fingerprint(lname, fname, qual, normalize)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
		0 ignored issues – show Coding Style Naming introduced 2018-11-10 01:42 UTC by Report Bug Copy Issue Report The name `_Synoname` does not conform to the module naming conventions (`(([a-z_][a-z0-9_]*)\|([A-Z][a-zA-Z0-9]+))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
2
3		# Copyright 2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.fingerprint._Synoname.
20
21		Synoname toolcode.
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from ._Fingerprint import Fingerprint
32
33	1	__all__ = ['SynonameToolcode', 'synoname_toolcode']
34
35
36	1	class SynonameToolcode(Fingerprint):
		0 ignored issues – show Unused Code introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
37		"""Synoname Toolcode.
38
39		Cf. :cite:`Getty:1991,Gross:1991`.
40		"""
41
42	1	synoname_special_table = (
43		# Roman, match, extra, method
44		(False, 'NONE', '', 0),
45		(False, 'aine', '', 3),
46		(False, 'also erroneously', '', 4),
47		(False, 'also identified with the', '', 2),
48		(False, 'also identified with', '', 2),
49		(False, 'archbishop', '', 7),
50		(False, 'atelier', '', 7),
51		(False, 'baron', '', 7),
52		(False, 'cadet', '', 3),
53		(False, 'cardinal', '', 7),
54		(False, 'circle of', '', 5),
55		(False, 'circle', '', 5),
56		(False, 'class of', '', 5),
57		(False, 'conde de', '', 7),
58		(False, 'countess', '', 7),
59		(False, 'count', '', 7),
60		(False, "d'", " d'", 15),
61		(False, 'dai', '', 15),
62		(False, "dall'", " dall'", 15),
63		(False, 'dalla', '', 15),
64		(False, 'dalle', '', 15),
65		(False, 'dal', '', 15),
66		(False, 'da', '', 15),
67		(False, 'degli', '', 15),
68		(False, 'della', '', 15),
69		(False, 'del', '', 15),
70		(False, 'den', '', 15),
71		(False, 'der altere', '', 3),
72		(False, 'der jungere', '', 3),
73		(False, 'der', '', 15),
74		(False, 'de la', '', 15),
75		(False, 'des', '', 15),
76		(False, "de'", " de'", 15),
77		(False, 'de', '', 15),
78		(False, 'di ser', '', 7),
79		(False, 'di', '', 15),
80		(False, 'dos', '', 15),
81		(False, 'du', '', 15),
82		(False, 'duke of', '', 7),
83		(False, 'earl of', '', 7),
84		(False, 'el', '', 15),
85		(False, 'fils', '', 3),
86		(False, 'florentine follower of', '', 5),
87		(False, 'follower of', '', 5),
88		(False, 'fra', '', 7),
89		(False, 'freiherr von', '', 7),
90		(False, 'giovane', '', 7),
91		(False, 'group', '', 5),
92		(True, 'iii', '', 3),
93		(True, 'ii', '', 3),
94		(False, 'il giovane', '', 7),
95		(False, 'il vecchio', '', 7),
96		(False, 'il', '', 15),
97		(False, "in't", '', 7),
98		(False, 'in het', '', 7),
99		(True, 'iv', '', 3),
100		(True, 'ix', '', 3),
101		(True, 'i', '', 3),
102		(False, 'jr.', '', 3),
103		(False, 'jr', '', 3),
104		(False, 'juniore', '', 3),
105		(False, 'junior', '', 3),
106		(False, 'king of', '', 7),
107		(False, "l'", " l'", 15),
108		(False, "l'aine", '', 3),
109		(False, 'la', '', 15),
110		(False, 'le jeune', '', 3),
111		(False, 'le', '', 15),
112		(False, 'lo', '', 15),
113		(False, 'maestro', '', 7),
114		(False, 'maitre', '', 7),
115		(False, 'marchioness', '', 7),
116		(False, 'markgrafin von', '', 7),
117		(False, 'marquess', '', 7),
118		(False, 'marquis', '', 7),
119		(False, 'master of the', '', 7),
120		(False, 'master of', '', 7),
121		(False, 'master known as the', '', 7),
122		(False, 'master with the', '', 7),
123		(False, 'master with', '', 7),
124		(False, 'masters', '', 7),
125		(False, 'master', '', 7),
126		(False, 'meister', '', 7),
127		(False, 'met de', '', 7),
128		(False, 'met', '', 7),
129		(False, 'mlle.', '', 7),
130		(False, 'mlle', '', 7),
131		(False, 'monogrammist', '', 7),
132		(False, 'monsu', '', 7),
133		(False, 'nee', '', 2),
134		(False, 'of', '', 3),
135		(False, 'oncle', '', 3),
136		(False, 'op den', '', 15),
137		(False, 'op de', '', 15),
138		(False, 'or', '', 2),
139		(False, 'over den', '', 15),
140		(False, 'over de', '', 15),
141		(False, 'over', '', 7),
142		(False, 'p.re', '', 7),
143		(False, 'p.r.a.', '', 1),
144		(False, 'padre', '', 7),
145		(False, 'painter', '', 7),
146		(False, 'pere', '', 3),
147		(False, 'possibly identified with', '', 6),
148		(False, 'possibly', '', 6),
149		(False, 'pseudo', '', 15),
150		(False, 'r.a.', '', 1),
151		(False, 'reichsgraf von', '', 7),
152		(False, 'ritter von', '', 7),
153		(False, 'sainte-', ' sainte-', 8),
154		(False, 'sainte', '', 7),
155		(False, 'saint-', ' saint-', 8),
156		(False, 'saint', '', 7),
157		(False, 'santa', '', 15),
158		(False, "sant'", " sant'", 15),
159		(False, 'san', '', 15),
160		(False, 'ser', '', 7),
161		(False, 'seniore', '', 3),
162		(False, 'senior', '', 3),
163		(False, 'sir', '', 5),
164		(False, 'sr.', '', 3),
165		(False, 'sr', '', 3),
166		(False, 'ss.', ' ss.', 14),
167		(False, 'ss', '', 6),
168		(False, 'st-', ' st-', 8),
169		(False, 'st.', ' st.', 15),
170		(False, 'ste-', ' ste-', 8),
171		(False, 'ste.', ' ste.', 15),
172		(False, 'studio', '', 7),
173		(False, 'sub-group', '', 5),
174		(False, 'sultan of', '', 7),
175		(False, 'ten', '', 15),
176		(False, 'ter', '', 15),
177		(False, 'the elder', '', 3),
178		(False, 'the younger', '', 3),
179		(False, 'the', '', 7),
180		(False, 'tot', '', 15),
181		(False, 'unidentified', '', 1),
182		(False, 'van den', '', 15),
183		(False, 'van der', '', 15),
184		(False, 'van de', '', 15),
185		(False, 'vanden', '', 15),
186		(False, 'vander', '', 15),
187		(False, 'van', '', 15),
188		(False, 'vecchia', '', 7),
189		(False, 'vecchio', '', 7),
190		(True, 'viii', '', 3),
191		(True, 'vii', '', 3),
192		(True, 'vi', '', 3),
193		(True, 'v', '', 3),
194		(False, 'vom', '', 7),
195		(False, 'von', '', 15),
196		(False, 'workshop', '', 7),
197		(True, 'xiii', '', 3),
198		(True, 'xii', '', 3),
199		(True, 'xiv', '', 3),
200		(True, 'xix', '', 3),
201		(True, 'xi', '', 3),
202		(True, 'xviii', '', 3),
203		(True, 'xvii', '', 3),
204		(True, 'xvi', '', 3),
205		(True, 'xv', '', 3),
206		(True, 'xx', '', 3),
207		(True, 'x', '', 3),
208		(False, 'y', '', 7),
209		)
210
211	1	method_dict = {
212		'end': 1,
213		'middle': 2,
214		'beginning': 4,
215		'beginning_no_space': 8,
216		}
217
218		# Fill field 0 (qualifier)
219	1	qual_3 = {
220		'adaptation after',
221		'after',
222		'assistant of',
223		'assistants of',
224		'circle of',
225		'follower of',
226		'imitator of',
227		'in the style of',
228		'manner of',
229		'pupil of',
230		'school of',
231		'studio of',
232		'style of',
233		'workshop of',
234		}
235	1	qual_2 = {'copy after', 'copy after?', 'copy of'}
236	1	qual_1 = {
237		'ascribed to',
238		'attributed to or copy after',
239		'attributed to',
240		'possibly',
241		}
242
243		# Fill field 2 (generation)
244	1	gen_1 = (
245		'the elder',
246		' sr.',
247		' sr',
248		'senior',
249		'der altere',
250		'il vecchio',
251		"l'aine",
252		'p.re',
253		'padre',
254		'seniore',
255		'vecchia',
256		'vecchio',
257		)
258	1	gen_2 = (
259		' jr.',
260		' jr',
261		'der jungere',
262		'il giovane',
263		'giovane',
264		'juniore',
265		'junior',
266		'le jeune',
267		'the younger',
268		)
269
270	1	def fingerprint(self, lname, fname='', qual='', normalize=0):
		0 ignored issues – show Bug introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Parameters differ from overridden 'fingerprint' method Loading history... Comprehensibility introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report This function exceeds the maximum number of variables (26/15). Loading history...
271		"""Build the Synoname toolcode.
272
273		Args:
274		lname (str): Last name
275		fname (str): First name (can be blank)
276		qual (str): Qualifier
277		normalize (int): Normalization mode (0, 1, or 2)
278
279		Returns:
280		tuple: The transformed names and the synoname toolcode
281
282		Examples:
283		>>> st = SynonameToolcode()
284		>>> st.fingerprint('hat')
285		('hat', '', '0000000003$$h')
286		>>> st.fingerprint('niall')
287		('niall', '', '0000000005$$n')
288		>>> st.fingerprint('colin')
289		('colin', '', '0000000005$$c')
290		>>> st.fingerprint('atcg')
291		('atcg', '', '0000000004$$a')
292		>>> st.fingerprint('entreatment')
293		('entreatment', '', '0000000011$$e')
294
295		>>> st.fingerprint('Ste.-Marie', 'Count John II', normalize=2)
296		('ste.-marie ii', 'count john', '0200491310$015b049a127c$smcji')
297		>>> st.fingerprint('Michelangelo IV', '', 'Workshop of')
298		('michelangelo iv', '', '3000550015$055b$mi')
299
300		"""
301	1	lname = lname.lower()
302	1	fname = fname.lower()
303	1	qual = qual.lower()
304
305		# Start with the basic code
306	1	toolcode = ['0', '0', '0', '000', '00', '00', '$', '', '$', '']
307
308	1	full_name = ' '.join((lname, fname))
309
310	1	if qual in self.qual_3:
311	1	toolcode[0] = '3'
312	1	elif qual in self.qual_2:
313	1	toolcode[0] = '2'
314	1	elif qual in self.qual_1:
315	1	toolcode[0] = '1'
316
317		# Fill field 1 (punctuation)
318	1	if '.' in full_name:
319	1	toolcode[1] = '2'
320		else:
321	1	for punct in ',-/:;"&\'()!{\|}?$%*+<=>[\\]^_`~':
322	1	if punct in full_name:
323	1	toolcode[1] = '1'
324	1	break
325
326	1	elderyounger = '' # save elder/younger for possible movement later
327	1	for gen in self.gen_1:
328	1	if gen in full_name:
329	1	toolcode[2] = '1'
330	1	elderyounger = gen
331	1	break
332		else:
333	1	for gen in self.gen_2:
334	1	if gen in full_name:
335	1	toolcode[2] = '2'
336	1	elderyounger = gen
337	1	break
338
339		# do comma flip
340	1	if normalize:
341	1	comma = lname.find(',')
342	1	if comma != -1:
343	1	lname_end = lname[comma + 1 :]
344	1	while lname_end[0] in {' ', ','}:
345	1	lname_end = lname_end[1:]
346	1	fname = lname_end + ' ' + fname
347	1	lname = lname[:comma].strip()
348
349		# do elder/younger move
350	1	if normalize == 2 and elderyounger:
351	1	elderyounger_loc = fname.find(elderyounger)
352	1	if elderyounger_loc != -1:
353	1	lname = ' '.join((lname, elderyounger.strip()))
354	1	fname = ' '.join(
355		(
356		fname[:elderyounger_loc].strip(),
357		fname[elderyounger_loc + len(elderyounger) :],
358		)
359		).strip()
360
361	1	toolcode[4] = '{:02d}'.format(len(fname))
362	1	toolcode[5] = '{:02d}'.format(len(lname))
363
364		# strip punctuation
365	1	for char in ',/:;"&()!{\|}?$%*+<=>[\\]^_`~':
366	1	full_name = full_name.replace(char, '')
367	1	for pos, char in enumerate(full_name):
368	1	if char == '-' and full_name[pos - 1 : pos + 2] != 'b-g':
369	1	full_name = full_name[:pos] + ' ' + full_name[pos + 1 :]
370
371		# Fill field 9 (search range)
372	1	for letter in [_[0] for _ in full_name.split()]:
373	1	if letter not in toolcode[9]:
374	1	toolcode[9] += letter
375	1	if len(toolcode[9]) == 15:
376	1	break
377
378	1	def roman_check(numeral, fname, lname):
379		"""Move Roman numerals from first name to last.
380
381		Args:
382		numeral (str): Roman numeral
383		fname (str): First name
384		lname (str): Last name
385
386		Returns:
387		tuple: First and last names with Roman numeral moved
388
389		"""
390	1	loc = fname.find(numeral)
391	1	if fname and (
392		loc != -1
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
393		and (len(fname[loc:]) == len(numeral))
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
394		or fname[loc + len(numeral)] in {' ', ','}
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
395		):
396	1	lname = ' '.join((lname, numeral))
397	1	fname = ' '.join(
398		(
399		fname[:loc].strip(),
400		fname[loc + len(numeral) :].lstrip(' ,'),
401		)
402		)
403	1	return fname.strip(), lname.strip()
404
405		# Fill fields 7 (specials) and 3 (roman numerals)
406	1	for num, special in enumerate(self.synoname_special_table):
		0 ignored issues – show unused-code introduced 2018-09-25 05:30 UTC by Report Bug Copy Issue Report Too many nested blocks (6/5) Loading history... unused-code introduced 2018-10-05 08:54 UTC by Report Bug Copy Issue Report Too many nested blocks (7/5) Loading history...
407	1	roman, match, extra, method = special
408	1	if method & self.method_dict['end']:
409	1	match_context = ' ' + match
410	1	loc = full_name.find(match_context)
411	1	if (len(full_name) > len(match_context)) and (
412		loc == len(full_name) - len(match_context)
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
413		):
414	1	if roman:
415	1	if not any(
416		abbr in fname for abbr in ('i.', 'v.', 'x.')
		0 ignored issues – show Coding Style introduced 2018-10-24 06:00 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
417		):
418	1	full_name = full_name[:loc]
419	1	toolcode[7] += '{:03d}'.format(num) + 'a'
420	1	if toolcode[3] == '000':
421	1	toolcode[3] = '{:03d}'.format(num)
422	1	if normalize == 2:
423	1	fname, lname = roman_check(match, fname, lname)
424		else:
425	1	full_name = full_name[:loc]
426	1	toolcode[7] += '{:03d}'.format(num) + 'a'
427	1	if method & self.method_dict['middle']:
428	1	match_context = ' ' + match + ' '
429	1	loc = 0
430	1	while loc != -1:
431	1	loc = full_name.find(match_context, loc + 1)
432	1	if loc > 0:
433	1	if roman:
434	1	if not any(
435		abbr in fname for abbr in ('i.', 'v.', 'x.')
		0 ignored issues – show Coding Style introduced 2018-11-04 08:02 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
436		):
437	1	full_name = (
438		full_name[:loc]
439		+ full_name[loc + len(match) + 1 :]
440		)
441	1	toolcode[7] += '{:03d}'.format(num) + 'b'
442	1	if toolcode[3] == '000':
443	1	toolcode[3] = '{:03d}'.format(num)
444	1	if normalize == 2:
445	1	fname, lname = roman_check(
446		match, fname, lname
447		)
448		else:
449	1	full_name = (
450		full_name[:loc]
451		+ full_name[loc + len(match) + 1 :]
452		)
453	1	toolcode[7] += '{:03d}'.format(num) + 'b'
454	1	if method & self.method_dict['beginning']:
455	1	match_context = match + ' '
456	1	loc = full_name.find(match_context)
457	1	if loc == 0:
458	1	full_name = full_name[len(match) + 1 :]
459	1	toolcode[7] += '{:03d}'.format(num) + 'c'
460	1	if method & self.method_dict['beginning_no_space']:
461	1	loc = full_name.find(match)
462	1	if loc == 0:
463	1	toolcode[7] += '{:03d}'.format(num) + 'd'
464	1	if full_name[: len(match)] not in toolcode[9]:
465	1	toolcode[9] += full_name[: len(match)]
466
467	1	if extra:
468	1	loc = full_name.find(extra)
469	1	if loc != -1:
470	1	toolcode[7] += '{:03d}'.format(num) + 'X'
471		# Since extras are unique, we only look for each of them
472		# once, and they include otherwise impossible characters
473		# for this field, it's not possible for the following line
474		# to have ever been false.
475		# if full_name[loc:loc+len(extra)] not in toolcode[9]:
476	1	toolcode[9] += full_name[loc : loc + len(match)]
477
478	1	return lname, fname, ''.join(toolcode)
479
480
481	1	def synoname_toolcode(lname, fname='', qual='', normalize=0):
482		"""Build the Synoname toolcode.
483
484		This is a wrapper for :py:meth:`SynonameToolcode.fingerprint`.
485
486		Args:
487		lname (str): Last name
488		fname (str): First name (can be blank)
489		qual (str): Qualifier
490		normalize (int): Normalization mode (0, 1, or 2)
491
492		Returns:
493		tuple: The transformed names and the synoname toolcode
494
495		Examples:
496		>>> synoname_toolcode('hat')
497		('hat', '', '0000000003$$h')
498		>>> synoname_toolcode('niall')
499		('niall', '', '0000000005$$n')
500		>>> synoname_toolcode('colin')
501		('colin', '', '0000000005$$c')
502		>>> synoname_toolcode('atcg')
503		('atcg', '', '0000000004$$a')
504		>>> synoname_toolcode('entreatment')
505		('entreatment', '', '0000000011$$e')
506
507		>>> synoname_toolcode('Ste.-Marie', 'Count John II', normalize=2)
508		('ste.-marie ii', 'count john', '0200491310$015b049a127c$smcji')
509		>>> synoname_toolcode('Michelangelo IV', '', 'Workshop of')
510		('michelangelo iv', '', '3000550015$055b$mi')
511
512		"""
513	1	return SynonameToolcode().fingerprint(lname, fname, qual, normalize)
514
515
516		if __name__ == '__main__':
517		import doctest
518
519		doctest.testmod()
520

chrislit / abydos

Pull Request — master (#141)

SynonameToolcode.fingerprint() F

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like