abydos.fingerprint._synoname.SynonameToolcode.fingerprint() - Code Metrics - Inspection of "started new entry in HISTORY for 0.4.0" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 3ac297...afe14d )

by Chris

created 2019-06-01 01:11 UTC

SynonameToolcode.fingerprint() F

↳ Parent: abydos.fingerprint._synoname

Complexity

Conditions

Size

Total Lines	223
Code Lines	128

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	110
CRAP Score	51

Importance

Changes

Metric	Value
cc	51
eloc	128
nop	5
dl	0
loc	223
ccs	110
cts	110
cp	1
crap	51
rs	0
c	0
b	0
f	0

How to fix Long Method Complexity

# -*- coding: utf-8 -*-

# Copyright 2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.fingerprint._synoname.

Synoname toolcode
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from ._fingerprint import _Fingerprint

__all__ = ['SynonameToolcode', 'synoname_toolcode']


class SynonameToolcode(_Fingerprint):
    """Synoname Toolcode.

    Cf. :cite:`Getty:1991,Gross:1991`.
    """

    _synoname_special_table = (
        # Roman, match, extra, method
        (False, 'NONE', '', 0),
        (False, 'aine', '', 3),
        (False, 'also erroneously', '', 4),
        (False, 'also identified with the', '', 2),
        (False, 'also identified with', '', 2),
        (False, 'archbishop', '', 7),
        (False, 'atelier', '', 7),
        (False, 'baron', '', 7),
        (False, 'cadet', '', 3),
        (False, 'cardinal', '', 7),
        (False, 'circle of', '', 5),
        (False, 'circle', '', 5),
        (False, 'class of', '', 5),
        (False, 'conde de', '', 7),
        (False, 'countess', '', 7),
        (False, 'count', '', 7),
        (False, "d'", " d'", 15),
        (False, 'dai', '', 15),
        (False, "dall'", " dall'", 15),
        (False, 'dalla', '', 15),
        (False, 'dalle', '', 15),
        (False, 'dal', '', 15),
        (False, 'da', '', 15),
        (False, 'degli', '', 15),
        (False, 'della', '', 15),
        (False, 'del', '', 15),
        (False, 'den', '', 15),
        (False, 'der altere', '', 3),
        (False, 'der jungere', '', 3),
        (False, 'der', '', 15),
        (False, 'de la', '', 15),
        (False, 'des', '', 15),
        (False, "de'", " de'", 15),
        (False, 'de', '', 15),
        (False, 'di ser', '', 7),
        (False, 'di', '', 15),
        (False, 'dos', '', 15),
        (False, 'du', '', 15),
        (False, 'duke of', '', 7),
        (False, 'earl of', '', 7),
        (False, 'el', '', 15),
        (False, 'fils', '', 3),
        (False, 'florentine follower of', '', 5),
        (False, 'follower of', '', 5),
        (False, 'fra', '', 7),
        (False, 'freiherr von', '', 7),
        (False, 'giovane', '', 7),
        (False, 'group', '', 5),
        (True, 'iii', '', 3),
        (True, 'ii', '', 3),
        (False, 'il giovane', '', 7),
        (False, 'il vecchio', '', 7),
        (False, 'il', '', 15),
        (False, "in't", '', 7),
        (False, 'in het', '', 7),
        (True, 'iv', '', 3),
        (True, 'ix', '', 3),
        (True, 'i', '', 3),
        (False, 'jr.', '', 3),
        (False, 'jr', '', 3),
        (False, 'juniore', '', 3),
        (False, 'junior', '', 3),
        (False, 'king of', '', 7),
        (False, "l'", " l'", 15),
        (False, "l'aine", '', 3),
        (False, 'la', '', 15),
        (False, 'le jeune', '', 3),
        (False, 'le', '', 15),
        (False, 'lo', '', 15),
        (False, 'maestro', '', 7),
        (False, 'maitre', '', 7),
        (False, 'marchioness', '', 7),
        (False, 'markgrafin von', '', 7),
        (False, 'marquess', '', 7),
        (False, 'marquis', '', 7),
        (False, 'master of the', '', 7),
        (False, 'master of', '', 7),
        (False, 'master known as the', '', 7),
        (False, 'master with the', '', 7),
        (False, 'master with', '', 7),
        (False, 'masters', '', 7),
        (False, 'master', '', 7),
        (False, 'meister', '', 7),
        (False, 'met de', '', 7),
        (False, 'met', '', 7),
        (False, 'mlle.', '', 7),
        (False, 'mlle', '', 7),
        (False, 'monogrammist', '', 7),
        (False, 'monsu', '', 7),
        (False, 'nee', '', 2),
        (False, 'of', '', 3),
        (False, 'oncle', '', 3),
        (False, 'op den', '', 15),
        (False, 'op de', '', 15),
        (False, 'or', '', 2),
        (False, 'over den', '', 15),
        (False, 'over de', '', 15),
        (False, 'over', '', 7),
        (False, 'p.re', '', 7),
        (False, 'p.r.a.', '', 1),
        (False, 'padre', '', 7),
        (False, 'painter', '', 7),
        (False, 'pere', '', 3),
        (False, 'possibly identified with', '', 6),
        (False, 'possibly', '', 6),
        (False, 'pseudo', '', 15),
        (False, 'r.a.', '', 1),
        (False, 'reichsgraf von', '', 7),
        (False, 'ritter von', '', 7),
        (False, 'sainte-', ' sainte-', 8),
        (False, 'sainte', '', 7),
        (False, 'saint-', ' saint-', 8),
        (False, 'saint', '', 7),
        (False, 'santa', '', 15),
        (False, "sant'", " sant'", 15),
        (False, 'san', '', 15),
        (False, 'ser', '', 7),
        (False, 'seniore', '', 3),
        (False, 'senior', '', 3),
        (False, 'sir', '', 5),
        (False, 'sr.', '', 3),
        (False, 'sr', '', 3),
        (False, 'ss.', ' ss.', 14),
        (False, 'ss', '', 6),
        (False, 'st-', ' st-', 8),
        (False, 'st.', ' st.', 15),
        (False, 'ste-', ' ste-', 8),
        (False, 'ste.', ' ste.', 15),
        (False, 'studio', '', 7),
        (False, 'sub-group', '', 5),
        (False, 'sultan of', '', 7),
        (False, 'ten', '', 15),
        (False, 'ter', '', 15),
        (False, 'the elder', '', 3),
        (False, 'the younger', '', 3),
        (False, 'the', '', 7),
        (False, 'tot', '', 15),
        (False, 'unidentified', '', 1),
        (False, 'van den', '', 15),
        (False, 'van der', '', 15),
        (False, 'van de', '', 15),
        (False, 'vanden', '', 15),
        (False, 'vander', '', 15),
        (False, 'van', '', 15),
        (False, 'vecchia', '', 7),
        (False, 'vecchio', '', 7),
        (True, 'viii', '', 3),
        (True, 'vii', '', 3),
        (True, 'vi', '', 3),
        (True, 'v', '', 3),
        (False, 'vom', '', 7),
        (False, 'von', '', 15),
        (False, 'workshop', '', 7),
        (True, 'xiii', '', 3),
        (True, 'xii', '', 3),
        (True, 'xiv', '', 3),
        (True, 'xix', '', 3),
        (True, 'xi', '', 3),
        (True, 'xviii', '', 3),
        (True, 'xvii', '', 3),
        (True, 'xvi', '', 3),
        (True, 'xv', '', 3),
        (True, 'xx', '', 3),
        (True, 'x', '', 3),
        (False, 'y', '', 7),
    )

    _method_dict = {
        'end': 1,
        'middle': 2,
        'beginning': 4,
        'beginning_no_space': 8,
    }

    # Fill field 0 (qualifier)
    _qual_3 = {
        'adaptation after',
        'after',
        'assistant of',
        'assistants of',
        'circle of',
        'follower of',
        'imitator of',
        'in the style of',
        'manner of',
        'pupil of',
        'school of',
        'studio of',
        'style of',
        'workshop of',
    }
    _qual_2 = {'copy after', 'copy after?', 'copy of'}
    _qual_1 = {
        'ascribed to',
        'attributed to or copy after',
        'attributed to',
        'possibly',
    }

    # Fill field 2 (generation)
    _gen_1 = (
        'the elder',
        ' sr.',
        ' sr',
        'senior',
        'der altere',
        'il vecchio',
        "l'aine",
        'p.re',
        'padre',
        'seniore',
        'vecchia',
        'vecchio',
    )
    _gen_2 = (
        ' jr.',
        ' jr',
        'der jungere',
        'il giovane',
        'giovane',
        'juniore',
        'junior',
        'le jeune',
        'the younger',
    )

    def fingerprint(self, lname, fname='', qual='', normalize=0):
        """Build the Synoname toolcode.

        Parameters
        ----------
        lname : str
            Last name
        fname : str
            First name (can be blank)
        qual : str
            Qualifier
        normalize : int
            Normalization mode (0, 1, or 2)

        Returns
        -------
        tuple
            The transformed names and the synoname toolcode

        Examples
        --------
        >>> st = SynonameToolcode()
        >>> st.fingerprint('hat')
        ('hat', '', '0000000003$$h')
        >>> st.fingerprint('niall')
        ('niall', '', '0000000005$$n')
        >>> st.fingerprint('colin')
        ('colin', '', '0000000005$$c')
        >>> st.fingerprint('atcg')
        ('atcg', '', '0000000004$$a')
        >>> st.fingerprint('entreatment')
        ('entreatment', '', '0000000011$$e')

        >>> st.fingerprint('Ste.-Marie', 'Count John II', normalize=2)
        ('ste.-marie ii', 'count john', '0200491310$015b049a127c$smcji')
        >>> st.fingerprint('Michelangelo IV', '', 'Workshop of')
        ('michelangelo iv', '', '3000550015$055b$mi')

        """
        lname = lname.lower()
        fname = fname.lower()
        qual = qual.lower()

        # Start with the basic code
        toolcode = ['0', '0', '0', '000', '00', '00', '$', '', '$', '']

        full_name = ' '.join((lname, fname))

        if qual in self._qual_3:
            toolcode[0] = '3'
        elif qual in self._qual_2:
            toolcode[0] = '2'
        elif qual in self._qual_1:
            toolcode[0] = '1'

        # Fill field 1 (punctuation)
        if '.' in full_name:
            toolcode[1] = '2'
        else:
            for punct in ',-/:;"&\'()!{|}?$%*+<=>[\\]^_`~':
                if punct in full_name:
                    toolcode[1] = '1'
                    break

        elderyounger = ''  # save elder/younger for possible movement later
        for gen in self._gen_1:
            if gen in full_name:
                toolcode[2] = '1'
                elderyounger = gen
                break
        else:
            for gen in self._gen_2:
                if gen in full_name:
                    toolcode[2] = '2'
                    elderyounger = gen
                    break

        # do comma flip
        if normalize:
            comma = lname.find(',')
            if comma != -1:
                lname_end = lname[comma + 1 :]
                while lname_end[0] in {' ', ','}:
                    lname_end = lname_end[1:]
                fname = lname_end + ' ' + fname
                lname = lname[:comma].strip()

        # do elder/younger move
        if normalize == 2 and elderyounger:
            elderyounger_loc = fname.find(elderyounger)
            if elderyounger_loc != -1:
                lname = ' '.join((lname, elderyounger.strip()))
                fname = ' '.join(
                    (
                        fname[:elderyounger_loc].strip(),
                        fname[elderyounger_loc + len(elderyounger) :],
                    )
                ).strip()

        toolcode[4] = '{:02d}'.format(len(fname))
        toolcode[5] = '{:02d}'.format(len(lname))

        # strip punctuation
        for char in ',/:;"&()!{|}?$%*+<=>[\\]^_`~':
            full_name = full_name.replace(char, '')
        for pos, char in enumerate(full_name):
            if char == '-' and full_name[pos - 1 : pos + 2] != 'b-g':
                full_name = full_name[:pos] + ' ' + full_name[pos + 1 :]

        # Fill field 9 (search range)
        for letter in [_[0] for _ in full_name.split()]:
            if letter not in toolcode[9]:
                toolcode[9] += letter
            if len(toolcode[9]) == 15:
                break

        def roman_check(numeral, fname, lname):
            """Move Roman numerals from first name to last.

            Parameters
            ----------
            numeral : str
                Roman numeral
            fname : str
                First name
            lname : str
                Last name

            Returns
            -------
            tuple
                First and last names with Roman numeral moved

            """
            loc = fname.find(numeral)
            if fname and (
                loc != -1
                and (len(fname[loc:]) == len(numeral))
                or fname[loc + len(numeral)] in {' ', ','}
            ):
                lname = ' '.join((lname, numeral))
                fname = ' '.join(
                    (
                        fname[:loc].strip(),
                        fname[loc + len(numeral) :].lstrip(' ,'),
                    )
                )
            return fname.strip(), lname.strip()

        # Fill fields 7 (specials) and 3 (roman numerals)
        for num, special in enumerate(self._synoname_special_table):
            roman, match, extra, method = special
            if method & self._method_dict['end']:
                match_context = ' ' + match
                loc = full_name.find(match_context)
                if (len(full_name) > len(match_context)) and (
                    loc == len(full_name) - len(match_context)
                ):
                    if roman:
                        if not any(
                            abbr in fname for abbr in ('i.', 'v.', 'x.')
                        ):
                            full_name = full_name[:loc]
                            toolcode[7] += '{:03d}'.format(num) + 'a'
                            if toolcode[3] == '000':
                                toolcode[3] = '{:03d}'.format(num)
                            if normalize == 2:
                                fname, lname = roman_check(match, fname, lname)
                    else:
                        full_name = full_name[:loc]
                        toolcode[7] += '{:03d}'.format(num) + 'a'
            if method & self._method_dict['middle']:
                match_context = ' ' + match + ' '
                loc = 0
                while loc != -1:
                    loc = full_name.find(match_context, loc + 1)
                    if loc > 0:
                        if roman:
                            if not any(
                                abbr in fname for abbr in ('i.', 'v.', 'x.')
                            ):
                                full_name = (
                                    full_name[:loc]
                                    + full_name[loc + len(match) + 1 :]
                                )
                                toolcode[7] += '{:03d}'.format(num) + 'b'
                                if toolcode[3] == '000':
                                    toolcode[3] = '{:03d}'.format(num)
                                if normalize == 2:
                                    fname, lname = roman_check(
                                        match, fname, lname
                                    )
                        else:
                            full_name = (
                                full_name[:loc]
                                + full_name[loc + len(match) + 1 :]
                            )
                            toolcode[7] += '{:03d}'.format(num) + 'b'
            if method & self._method_dict['beginning']:
                match_context = match + ' '
                loc = full_name.find(match_context)
                if loc == 0:
                    full_name = full_name[len(match) + 1 :]
                    toolcode[7] += '{:03d}'.format(num) + 'c'
            if method & self._method_dict['beginning_no_space']:
                loc = full_name.find(match)
                if loc == 0:
                    toolcode[7] += '{:03d}'.format(num) + 'd'
                    if full_name[: len(match)] not in toolcode[9]:
                        toolcode[9] += full_name[: len(match)]

            if extra:
                loc = full_name.find(extra)
                if loc != -1:
                    toolcode[7] += '{:03d}'.format(num) + 'X'
                    # Since extras are unique, we only look for each of them
                    # once, and they include otherwise impossible characters
                    # for this field, it's not possible for the following line
                    # to have ever been false.
                    # if full_name[loc:loc+len(extra)] not in toolcode[9]:
                    toolcode[9] += full_name[loc : loc + len(match)]

        return lname, fname, ''.join(toolcode)


def synoname_toolcode(lname, fname='', qual='', normalize=0):
    """Build the Synoname toolcode.

    This is a wrapper for :py:meth:`SynonameToolcode.fingerprint`.

    Parameters
    ----------
    lname : str
        Last name
    fname : str
        First name (can be blank)
    qual : str
        Qualifier
    normalize : int
        Normalization mode (0, 1, or 2)

    Returns
    -------
    tuple
        The transformed names and the synoname toolcode

    Examples
    --------
    >>> synoname_toolcode('hat')
    ('hat', '', '0000000003$$h')
    >>> synoname_toolcode('niall')
    ('niall', '', '0000000005$$n')
    >>> synoname_toolcode('colin')
    ('colin', '', '0000000005$$c')
    >>> synoname_toolcode('atcg')
    ('atcg', '', '0000000004$$a')
    >>> synoname_toolcode('entreatment')
    ('entreatment', '', '0000000011$$e')

    >>> synoname_toolcode('Ste.-Marie', 'Count John II', normalize=2)
    ('ste.-marie ii', 'count john', '0200491310$015b049a127c$smcji')
    >>> synoname_toolcode('Michelangelo IV', '', 'Workshop of')
    ('michelangelo iv', '', '3000550015$055b$mi')

    """
    return SynonameToolcode().fingerprint(lname, fname, qual, normalize)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.fingerprint._synoname.
20
21		Synoname toolcode
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from ._fingerprint import _Fingerprint
32
33	1	__all__ = ['SynonameToolcode', 'synoname_toolcode']
34
35
36	1	class SynonameToolcode(_Fingerprint):
37		"""Synoname Toolcode.
38
39		Cf. :cite:`Getty:1991,Gross:1991`.
40		"""
41
42	1	_synoname_special_table = (
43		# Roman, match, extra, method
44		(False, 'NONE', '', 0),
45		(False, 'aine', '', 3),
46		(False, 'also erroneously', '', 4),
47		(False, 'also identified with the', '', 2),
48		(False, 'also identified with', '', 2),
49		(False, 'archbishop', '', 7),
50		(False, 'atelier', '', 7),
51		(False, 'baron', '', 7),
52		(False, 'cadet', '', 3),
53		(False, 'cardinal', '', 7),
54		(False, 'circle of', '', 5),
55		(False, 'circle', '', 5),
56		(False, 'class of', '', 5),
57		(False, 'conde de', '', 7),
58		(False, 'countess', '', 7),
59		(False, 'count', '', 7),
60		(False, "d'", " d'", 15),
61		(False, 'dai', '', 15),
62		(False, "dall'", " dall'", 15),
63		(False, 'dalla', '', 15),
64		(False, 'dalle', '', 15),
65		(False, 'dal', '', 15),
66		(False, 'da', '', 15),
67		(False, 'degli', '', 15),
68		(False, 'della', '', 15),
69		(False, 'del', '', 15),
70		(False, 'den', '', 15),
71		(False, 'der altere', '', 3),
72		(False, 'der jungere', '', 3),
73		(False, 'der', '', 15),
74		(False, 'de la', '', 15),
75		(False, 'des', '', 15),
76		(False, "de'", " de'", 15),
77		(False, 'de', '', 15),
78		(False, 'di ser', '', 7),
79		(False, 'di', '', 15),
80		(False, 'dos', '', 15),
81		(False, 'du', '', 15),
82		(False, 'duke of', '', 7),
83		(False, 'earl of', '', 7),
84		(False, 'el', '', 15),
85		(False, 'fils', '', 3),
86		(False, 'florentine follower of', '', 5),
87		(False, 'follower of', '', 5),
88		(False, 'fra', '', 7),
89		(False, 'freiherr von', '', 7),
90		(False, 'giovane', '', 7),
91		(False, 'group', '', 5),
92		(True, 'iii', '', 3),
93		(True, 'ii', '', 3),
94		(False, 'il giovane', '', 7),
95		(False, 'il vecchio', '', 7),
96		(False, 'il', '', 15),
97		(False, "in't", '', 7),
98		(False, 'in het', '', 7),
99		(True, 'iv', '', 3),
100		(True, 'ix', '', 3),
101		(True, 'i', '', 3),
102		(False, 'jr.', '', 3),
103		(False, 'jr', '', 3),
104		(False, 'juniore', '', 3),
105		(False, 'junior', '', 3),
106		(False, 'king of', '', 7),
107		(False, "l'", " l'", 15),
108		(False, "l'aine", '', 3),
109		(False, 'la', '', 15),
110		(False, 'le jeune', '', 3),
111		(False, 'le', '', 15),
112		(False, 'lo', '', 15),
113		(False, 'maestro', '', 7),
114		(False, 'maitre', '', 7),
115		(False, 'marchioness', '', 7),
116		(False, 'markgrafin von', '', 7),
117		(False, 'marquess', '', 7),
118		(False, 'marquis', '', 7),
119		(False, 'master of the', '', 7),
120		(False, 'master of', '', 7),
121		(False, 'master known as the', '', 7),
122		(False, 'master with the', '', 7),
123		(False, 'master with', '', 7),
124		(False, 'masters', '', 7),
125		(False, 'master', '', 7),
126		(False, 'meister', '', 7),
127		(False, 'met de', '', 7),
128		(False, 'met', '', 7),
129		(False, 'mlle.', '', 7),
130		(False, 'mlle', '', 7),
131		(False, 'monogrammist', '', 7),
132		(False, 'monsu', '', 7),
133		(False, 'nee', '', 2),
134		(False, 'of', '', 3),
135		(False, 'oncle', '', 3),
136		(False, 'op den', '', 15),
137		(False, 'op de', '', 15),
138		(False, 'or', '', 2),
139		(False, 'over den', '', 15),
140		(False, 'over de', '', 15),
141		(False, 'over', '', 7),
142		(False, 'p.re', '', 7),
143		(False, 'p.r.a.', '', 1),
144		(False, 'padre', '', 7),
145		(False, 'painter', '', 7),
146		(False, 'pere', '', 3),
147		(False, 'possibly identified with', '', 6),
148		(False, 'possibly', '', 6),
149		(False, 'pseudo', '', 15),
150		(False, 'r.a.', '', 1),
151		(False, 'reichsgraf von', '', 7),
152		(False, 'ritter von', '', 7),
153		(False, 'sainte-', ' sainte-', 8),
154		(False, 'sainte', '', 7),
155		(False, 'saint-', ' saint-', 8),
156		(False, 'saint', '', 7),
157		(False, 'santa', '', 15),
158		(False, "sant'", " sant'", 15),
159		(False, 'san', '', 15),
160		(False, 'ser', '', 7),
161		(False, 'seniore', '', 3),
162		(False, 'senior', '', 3),
163		(False, 'sir', '', 5),
164		(False, 'sr.', '', 3),
165		(False, 'sr', '', 3),
166		(False, 'ss.', ' ss.', 14),
167		(False, 'ss', '', 6),
168		(False, 'st-', ' st-', 8),
169		(False, 'st.', ' st.', 15),
170		(False, 'ste-', ' ste-', 8),
171		(False, 'ste.', ' ste.', 15),
172		(False, 'studio', '', 7),
173		(False, 'sub-group', '', 5),
174		(False, 'sultan of', '', 7),
175		(False, 'ten', '', 15),
176		(False, 'ter', '', 15),
177		(False, 'the elder', '', 3),
178		(False, 'the younger', '', 3),
179		(False, 'the', '', 7),
180		(False, 'tot', '', 15),
181		(False, 'unidentified', '', 1),
182		(False, 'van den', '', 15),
183		(False, 'van der', '', 15),
184		(False, 'van de', '', 15),
185		(False, 'vanden', '', 15),
186		(False, 'vander', '', 15),
187		(False, 'van', '', 15),
188		(False, 'vecchia', '', 7),
189		(False, 'vecchio', '', 7),
190		(True, 'viii', '', 3),
191		(True, 'vii', '', 3),
192		(True, 'vi', '', 3),
193		(True, 'v', '', 3),
194		(False, 'vom', '', 7),
195		(False, 'von', '', 15),
196		(False, 'workshop', '', 7),
197		(True, 'xiii', '', 3),
198		(True, 'xii', '', 3),
199		(True, 'xiv', '', 3),
200		(True, 'xix', '', 3),
201		(True, 'xi', '', 3),
202		(True, 'xviii', '', 3),
203		(True, 'xvii', '', 3),
204		(True, 'xvi', '', 3),
205		(True, 'xv', '', 3),
206		(True, 'xx', '', 3),
207		(True, 'x', '', 3),
208		(False, 'y', '', 7),
209		)
210
211	1	_method_dict = {
212		'end': 1,
213		'middle': 2,
214		'beginning': 4,
215		'beginning_no_space': 8,
216		}
217
218		# Fill field 0 (qualifier)
219	1	_qual_3 = {
220		'adaptation after',
221		'after',
222		'assistant of',
223		'assistants of',
224		'circle of',
225		'follower of',
226		'imitator of',
227		'in the style of',
228		'manner of',
229		'pupil of',
230		'school of',
231		'studio of',
232		'style of',
233		'workshop of',
234		}
235	1	_qual_2 = {'copy after', 'copy after?', 'copy of'}
236	1	_qual_1 = {
237		'ascribed to',
238		'attributed to or copy after',
239		'attributed to',
240		'possibly',
241		}
242
243		# Fill field 2 (generation)
244	1	_gen_1 = (
245		'the elder',
246		' sr.',
247		' sr',
248		'senior',
249		'der altere',
250		'il vecchio',
251		"l'aine",
252		'p.re',
253		'padre',
254		'seniore',
255		'vecchia',
256		'vecchio',
257		)
258	1	_gen_2 = (
259		' jr.',
260		' jr',
261		'der jungere',
262		'il giovane',
263		'giovane',
264		'juniore',
265		'junior',
266		'le jeune',
267		'the younger',
268		)
269
270	1	def fingerprint(self, lname, fname='', qual='', normalize=0):
271		"""Build the Synoname toolcode.
272
273		Parameters
274		----------
275		lname : str
276		Last name
277		fname : str
278		First name (can be blank)
279		qual : str
280		Qualifier
281		normalize : int
282		Normalization mode (0, 1, or 2)
283
284		Returns
285		-------
286		tuple
287		The transformed names and the synoname toolcode
288
289		Examples
290		--------
291		>>> st = SynonameToolcode()
292		>>> st.fingerprint('hat')
293		('hat', '', '0000000003$$h')
294		>>> st.fingerprint('niall')
295		('niall', '', '0000000005$$n')
296		>>> st.fingerprint('colin')
297		('colin', '', '0000000005$$c')
298		>>> st.fingerprint('atcg')
299		('atcg', '', '0000000004$$a')
300		>>> st.fingerprint('entreatment')
301		('entreatment', '', '0000000011$$e')
302
303		>>> st.fingerprint('Ste.-Marie', 'Count John II', normalize=2)
304		('ste.-marie ii', 'count john', '0200491310$015b049a127c$smcji')
305		>>> st.fingerprint('Michelangelo IV', '', 'Workshop of')
306		('michelangelo iv', '', '3000550015$055b$mi')
307
308		"""
309	1	lname = lname.lower()
310	1	fname = fname.lower()
311	1	qual = qual.lower()
312
313		# Start with the basic code
314	1	toolcode = ['0', '0', '0', '000', '00', '00', '$', '', '$', '']
315
316	1	full_name = ' '.join((lname, fname))
317
318	1	if qual in self._qual_3:
319	1	toolcode[0] = '3'
320	1	elif qual in self._qual_2:
321	1	toolcode[0] = '2'
322	1	elif qual in self._qual_1:
323	1	toolcode[0] = '1'
324
325		# Fill field 1 (punctuation)
326	1	if '.' in full_name:
327	1	toolcode[1] = '2'
328		else:
329	1	for punct in ',-/:;"&\'()!{\|}?$%*+<=>[\\]^_`~':
330	1	if punct in full_name:
331	1	toolcode[1] = '1'
332	1	break
333
334	1	elderyounger = '' # save elder/younger for possible movement later
335	1	for gen in self._gen_1:
336	1	if gen in full_name:
337	1	toolcode[2] = '1'
338	1	elderyounger = gen
339	1	break
340		else:
341	1	for gen in self._gen_2:
342	1	if gen in full_name:
343	1	toolcode[2] = '2'
344	1	elderyounger = gen
345	1	break
346
347		# do comma flip
348	1	if normalize:
349	1	comma = lname.find(',')
350	1	if comma != -1:
351	1	lname_end = lname[comma + 1 :]
352	1	while lname_end[0] in {' ', ','}:
353	1	lname_end = lname_end[1:]
354	1	fname = lname_end + ' ' + fname
355	1	lname = lname[:comma].strip()
356
357		# do elder/younger move
358	1	if normalize == 2 and elderyounger:
359	1	elderyounger_loc = fname.find(elderyounger)
360	1	if elderyounger_loc != -1:
361	1	lname = ' '.join((lname, elderyounger.strip()))
362	1	fname = ' '.join(
363		(
364		fname[:elderyounger_loc].strip(),
365		fname[elderyounger_loc + len(elderyounger) :],
366		)
367		).strip()
368
369	1	toolcode[4] = '{:02d}'.format(len(fname))
370	1	toolcode[5] = '{:02d}'.format(len(lname))
371
372		# strip punctuation
373	1	for char in ',/:;"&()!{\|}?$%*+<=>[\\]^_`~':
374	1	full_name = full_name.replace(char, '')
375	1	for pos, char in enumerate(full_name):
376	1	if char == '-' and full_name[pos - 1 : pos + 2] != 'b-g':
377	1	full_name = full_name[:pos] + ' ' + full_name[pos + 1 :]
378
379		# Fill field 9 (search range)
380	1	for letter in [_[0] for _ in full_name.split()]:
381	1	if letter not in toolcode[9]:
382	1	toolcode[9] += letter
383	1	if len(toolcode[9]) == 15:
384	1	break
385
386	1	def roman_check(numeral, fname, lname):
387		"""Move Roman numerals from first name to last.
388
389		Parameters
390		----------
391		numeral : str
392		Roman numeral
393		fname : str
394		First name
395		lname : str
396		Last name
397
398		Returns
399		-------
400		tuple
401		First and last names with Roman numeral moved
402
403		"""
404	1	loc = fname.find(numeral)
405	1	if fname and (
406		loc != -1
407		and (len(fname[loc:]) == len(numeral))
408		or fname[loc + len(numeral)] in {' ', ','}
409		):
410	1	lname = ' '.join((lname, numeral))
411	1	fname = ' '.join(
412		(
413		fname[:loc].strip(),
414		fname[loc + len(numeral) :].lstrip(' ,'),
415		)
416		)
417	1	return fname.strip(), lname.strip()
418
419		# Fill fields 7 (specials) and 3 (roman numerals)
420	1	for num, special in enumerate(self._synoname_special_table):
421	1	roman, match, extra, method = special
422	1	if method & self._method_dict['end']:
423	1	match_context = ' ' + match
424	1	loc = full_name.find(match_context)
425	1	if (len(full_name) > len(match_context)) and (
426		loc == len(full_name) - len(match_context)
427		):
428	1	if roman:
429	1	if not any(
430		abbr in fname for abbr in ('i.', 'v.', 'x.')
431		):
432	1	full_name = full_name[:loc]
433	1	toolcode[7] += '{:03d}'.format(num) + 'a'
434	1	if toolcode[3] == '000':
435	1	toolcode[3] = '{:03d}'.format(num)
436	1	if normalize == 2:
437	1	fname, lname = roman_check(match, fname, lname)
438		else:
439	1	full_name = full_name[:loc]
440	1	toolcode[7] += '{:03d}'.format(num) + 'a'
441	1	if method & self._method_dict['middle']:
442	1	match_context = ' ' + match + ' '
443	1	loc = 0
444	1	while loc != -1:
445	1	loc = full_name.find(match_context, loc + 1)
446	1	if loc > 0:
447	1	if roman:
448	1	if not any(
449		abbr in fname for abbr in ('i.', 'v.', 'x.')
450		):
451	1	full_name = (
452		full_name[:loc]
453		+ full_name[loc + len(match) + 1 :]
454		)
455	1	toolcode[7] += '{:03d}'.format(num) + 'b'
456	1	if toolcode[3] == '000':
457	1	toolcode[3] = '{:03d}'.format(num)
458	1	if normalize == 2:
459	1	fname, lname = roman_check(
460		match, fname, lname
461		)
462		else:
463	1	full_name = (
464		full_name[:loc]
465		+ full_name[loc + len(match) + 1 :]
466		)
467	1	toolcode[7] += '{:03d}'.format(num) + 'b'
468	1	if method & self._method_dict['beginning']:
469	1	match_context = match + ' '
470	1	loc = full_name.find(match_context)
471	1	if loc == 0:
472	1	full_name = full_name[len(match) + 1 :]
473	1	toolcode[7] += '{:03d}'.format(num) + 'c'
474	1	if method & self._method_dict['beginning_no_space']:
475	1	loc = full_name.find(match)
476	1	if loc == 0:
477	1	toolcode[7] += '{:03d}'.format(num) + 'd'
478	1	if full_name[: len(match)] not in toolcode[9]:
479	1	toolcode[9] += full_name[: len(match)]
480
481	1	if extra:
482	1	loc = full_name.find(extra)
483	1	if loc != -1:
484	1	toolcode[7] += '{:03d}'.format(num) + 'X'
485		# Since extras are unique, we only look for each of them
486		# once, and they include otherwise impossible characters
487		# for this field, it's not possible for the following line
488		# to have ever been false.
489		# if full_name[loc:loc+len(extra)] not in toolcode[9]:
490	1	toolcode[9] += full_name[loc : loc + len(match)]
491
492	1	return lname, fname, ''.join(toolcode)
493
494
495	1	def synoname_toolcode(lname, fname='', qual='', normalize=0):
496		"""Build the Synoname toolcode.
497
498		This is a wrapper for :py:meth:`SynonameToolcode.fingerprint`.
499
500		Parameters
501		----------
502		lname : str
503		Last name
504		fname : str
505		First name (can be blank)
506		qual : str
507		Qualifier
508		normalize : int
509		Normalization mode (0, 1, or 2)
510
511		Returns
512		-------
513		tuple
514		The transformed names and the synoname toolcode
515
516		Examples
517		--------
518		>>> synoname_toolcode('hat')
519		('hat', '', '0000000003$$h')
520		>>> synoname_toolcode('niall')
521		('niall', '', '0000000005$$n')
522		>>> synoname_toolcode('colin')
523		('colin', '', '0000000005$$c')
524		>>> synoname_toolcode('atcg')
525		('atcg', '', '0000000004$$a')
526		>>> synoname_toolcode('entreatment')
527		('entreatment', '', '0000000011$$e')
528
529		>>> synoname_toolcode('Ste.-Marie', 'Count John II', normalize=2)
530		('ste.-marie ii', 'count john', '0200491310$015b049a127c$smcji')
531		>>> synoname_toolcode('Michelangelo IV', '', 'Workshop of')
532		('michelangelo iv', '', '3000550015$055b$mi')
533
534		"""
535	1	return SynonameToolcode().fingerprint(lname, fname, qual, normalize)
536
537
538		if __name__ == '__main__':
539		import doctest
540
541		doctest.testmod()
542

chrislit / abydos

Push — master ( 3ac297...afe14d )

SynonameToolcode.fingerprint() F

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like