Test Failed
Push — master ( 23810f...afe14d )
by Chris
09:47
created

abydos.fingerprint._synoname   B

Complexity

Total Complexity 52

Size/Duplication

Total Lines 542
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
eloc 358
dl 0
loc 542
ccs 124
cts 124
cp 1
rs 7.44
c 0
b 0
f 0
wmc 52

1 Function

Rating   Name   Duplication   Size   Complexity  
A synoname_toolcode() 0 41 1

1 Method

Rating   Name   Duplication   Size   Complexity  
F SynonameToolcode.fingerprint() 0 223 51

How to fix   Complexity   

Complexity

Complex classes like abydos.fingerprint._synoname often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.fingerprint._synoname.
20
21
Synoname toolcode
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from ._fingerprint import _Fingerprint
32
33 1
__all__ = ['SynonameToolcode', 'synoname_toolcode']
34
35
36 1
class SynonameToolcode(_Fingerprint):
37
    """Synoname Toolcode.
38
39
    Cf. :cite:`Getty:1991,Gross:1991`.
40
    """
41
42 1
    _synoname_special_table = (
43
        # Roman, match, extra, method
44
        (False, 'NONE', '', 0),
45
        (False, 'aine', '', 3),
46
        (False, 'also erroneously', '', 4),
47
        (False, 'also identified with the', '', 2),
48
        (False, 'also identified with', '', 2),
49
        (False, 'archbishop', '', 7),
50
        (False, 'atelier', '', 7),
51
        (False, 'baron', '', 7),
52
        (False, 'cadet', '', 3),
53
        (False, 'cardinal', '', 7),
54
        (False, 'circle of', '', 5),
55
        (False, 'circle', '', 5),
56
        (False, 'class of', '', 5),
57
        (False, 'conde de', '', 7),
58
        (False, 'countess', '', 7),
59
        (False, 'count', '', 7),
60
        (False, "d'", " d'", 15),
61
        (False, 'dai', '', 15),
62
        (False, "dall'", " dall'", 15),
63
        (False, 'dalla', '', 15),
64
        (False, 'dalle', '', 15),
65
        (False, 'dal', '', 15),
66
        (False, 'da', '', 15),
67
        (False, 'degli', '', 15),
68
        (False, 'della', '', 15),
69
        (False, 'del', '', 15),
70
        (False, 'den', '', 15),
71
        (False, 'der altere', '', 3),
72
        (False, 'der jungere', '', 3),
73
        (False, 'der', '', 15),
74
        (False, 'de la', '', 15),
75
        (False, 'des', '', 15),
76
        (False, "de'", " de'", 15),
77
        (False, 'de', '', 15),
78
        (False, 'di ser', '', 7),
79
        (False, 'di', '', 15),
80
        (False, 'dos', '', 15),
81
        (False, 'du', '', 15),
82
        (False, 'duke of', '', 7),
83
        (False, 'earl of', '', 7),
84
        (False, 'el', '', 15),
85
        (False, 'fils', '', 3),
86
        (False, 'florentine follower of', '', 5),
87
        (False, 'follower of', '', 5),
88
        (False, 'fra', '', 7),
89
        (False, 'freiherr von', '', 7),
90
        (False, 'giovane', '', 7),
91
        (False, 'group', '', 5),
92
        (True, 'iii', '', 3),
93
        (True, 'ii', '', 3),
94
        (False, 'il giovane', '', 7),
95
        (False, 'il vecchio', '', 7),
96
        (False, 'il', '', 15),
97
        (False, "in't", '', 7),
98
        (False, 'in het', '', 7),
99
        (True, 'iv', '', 3),
100
        (True, 'ix', '', 3),
101
        (True, 'i', '', 3),
102
        (False, 'jr.', '', 3),
103
        (False, 'jr', '', 3),
104
        (False, 'juniore', '', 3),
105
        (False, 'junior', '', 3),
106
        (False, 'king of', '', 7),
107
        (False, "l'", " l'", 15),
108
        (False, "l'aine", '', 3),
109
        (False, 'la', '', 15),
110
        (False, 'le jeune', '', 3),
111
        (False, 'le', '', 15),
112
        (False, 'lo', '', 15),
113
        (False, 'maestro', '', 7),
114
        (False, 'maitre', '', 7),
115
        (False, 'marchioness', '', 7),
116
        (False, 'markgrafin von', '', 7),
117
        (False, 'marquess', '', 7),
118
        (False, 'marquis', '', 7),
119
        (False, 'master of the', '', 7),
120
        (False, 'master of', '', 7),
121
        (False, 'master known as the', '', 7),
122
        (False, 'master with the', '', 7),
123
        (False, 'master with', '', 7),
124
        (False, 'masters', '', 7),
125
        (False, 'master', '', 7),
126
        (False, 'meister', '', 7),
127
        (False, 'met de', '', 7),
128
        (False, 'met', '', 7),
129
        (False, 'mlle.', '', 7),
130
        (False, 'mlle', '', 7),
131
        (False, 'monogrammist', '', 7),
132
        (False, 'monsu', '', 7),
133
        (False, 'nee', '', 2),
134
        (False, 'of', '', 3),
135
        (False, 'oncle', '', 3),
136
        (False, 'op den', '', 15),
137
        (False, 'op de', '', 15),
138
        (False, 'or', '', 2),
139
        (False, 'over den', '', 15),
140
        (False, 'over de', '', 15),
141
        (False, 'over', '', 7),
142
        (False, 'p.re', '', 7),
143
        (False, 'p.r.a.', '', 1),
144
        (False, 'padre', '', 7),
145
        (False, 'painter', '', 7),
146
        (False, 'pere', '', 3),
147
        (False, 'possibly identified with', '', 6),
148
        (False, 'possibly', '', 6),
149
        (False, 'pseudo', '', 15),
150
        (False, 'r.a.', '', 1),
151
        (False, 'reichsgraf von', '', 7),
152
        (False, 'ritter von', '', 7),
153
        (False, 'sainte-', ' sainte-', 8),
154
        (False, 'sainte', '', 7),
155
        (False, 'saint-', ' saint-', 8),
156
        (False, 'saint', '', 7),
157
        (False, 'santa', '', 15),
158
        (False, "sant'", " sant'", 15),
159
        (False, 'san', '', 15),
160
        (False, 'ser', '', 7),
161
        (False, 'seniore', '', 3),
162
        (False, 'senior', '', 3),
163
        (False, 'sir', '', 5),
164
        (False, 'sr.', '', 3),
165
        (False, 'sr', '', 3),
166
        (False, 'ss.', ' ss.', 14),
167
        (False, 'ss', '', 6),
168
        (False, 'st-', ' st-', 8),
169
        (False, 'st.', ' st.', 15),
170
        (False, 'ste-', ' ste-', 8),
171
        (False, 'ste.', ' ste.', 15),
172
        (False, 'studio', '', 7),
173
        (False, 'sub-group', '', 5),
174
        (False, 'sultan of', '', 7),
175
        (False, 'ten', '', 15),
176
        (False, 'ter', '', 15),
177
        (False, 'the elder', '', 3),
178
        (False, 'the younger', '', 3),
179
        (False, 'the', '', 7),
180
        (False, 'tot', '', 15),
181
        (False, 'unidentified', '', 1),
182
        (False, 'van den', '', 15),
183
        (False, 'van der', '', 15),
184
        (False, 'van de', '', 15),
185
        (False, 'vanden', '', 15),
186
        (False, 'vander', '', 15),
187
        (False, 'van', '', 15),
188
        (False, 'vecchia', '', 7),
189
        (False, 'vecchio', '', 7),
190
        (True, 'viii', '', 3),
191
        (True, 'vii', '', 3),
192
        (True, 'vi', '', 3),
193
        (True, 'v', '', 3),
194
        (False, 'vom', '', 7),
195
        (False, 'von', '', 15),
196
        (False, 'workshop', '', 7),
197
        (True, 'xiii', '', 3),
198
        (True, 'xii', '', 3),
199
        (True, 'xiv', '', 3),
200
        (True, 'xix', '', 3),
201
        (True, 'xi', '', 3),
202
        (True, 'xviii', '', 3),
203
        (True, 'xvii', '', 3),
204
        (True, 'xvi', '', 3),
205
        (True, 'xv', '', 3),
206
        (True, 'xx', '', 3),
207
        (True, 'x', '', 3),
208
        (False, 'y', '', 7),
209
    )
210
211 1
    _method_dict = {
212
        'end': 1,
213
        'middle': 2,
214
        'beginning': 4,
215
        'beginning_no_space': 8,
216
    }
217
218
    # Fill field 0 (qualifier)
219 1
    _qual_3 = {
220
        'adaptation after',
221
        'after',
222
        'assistant of',
223
        'assistants of',
224
        'circle of',
225
        'follower of',
226
        'imitator of',
227
        'in the style of',
228
        'manner of',
229
        'pupil of',
230
        'school of',
231
        'studio of',
232
        'style of',
233
        'workshop of',
234
    }
235 1
    _qual_2 = {'copy after', 'copy after?', 'copy of'}
236 1
    _qual_1 = {
237
        'ascribed to',
238
        'attributed to or copy after',
239
        'attributed to',
240
        'possibly',
241
    }
242
243
    # Fill field 2 (generation)
244 1
    _gen_1 = (
245
        'the elder',
246
        ' sr.',
247
        ' sr',
248
        'senior',
249
        'der altere',
250
        'il vecchio',
251
        "l'aine",
252
        'p.re',
253
        'padre',
254
        'seniore',
255
        'vecchia',
256
        'vecchio',
257
    )
258 1
    _gen_2 = (
259
        ' jr.',
260
        ' jr',
261
        'der jungere',
262
        'il giovane',
263
        'giovane',
264
        'juniore',
265
        'junior',
266
        'le jeune',
267
        'the younger',
268
    )
269
270 1
    def fingerprint(self, lname, fname='', qual='', normalize=0):
271
        """Build the Synoname toolcode.
272
273
        Parameters
274
        ----------
275
        lname : str
276
            Last name
277
        fname : str
278
            First name (can be blank)
279
        qual : str
280
            Qualifier
281
        normalize : int
282
            Normalization mode (0, 1, or 2)
283
284
        Returns
285
        -------
286
        tuple
287
            The transformed names and the synoname toolcode
288
289
        Examples
290
        --------
291
        >>> st = SynonameToolcode()
292
        >>> st.fingerprint('hat')
293
        ('hat', '', '0000000003$$h')
294
        >>> st.fingerprint('niall')
295
        ('niall', '', '0000000005$$n')
296
        >>> st.fingerprint('colin')
297
        ('colin', '', '0000000005$$c')
298
        >>> st.fingerprint('atcg')
299
        ('atcg', '', '0000000004$$a')
300
        >>> st.fingerprint('entreatment')
301
        ('entreatment', '', '0000000011$$e')
302
303
        >>> st.fingerprint('Ste.-Marie', 'Count John II', normalize=2)
304
        ('ste.-marie ii', 'count john', '0200491310$015b049a127c$smcji')
305
        >>> st.fingerprint('Michelangelo IV', '', 'Workshop of')
306
        ('michelangelo iv', '', '3000550015$055b$mi')
307
308
        """
309 1
        lname = lname.lower()
310 1
        fname = fname.lower()
311 1
        qual = qual.lower()
312
313
        # Start with the basic code
314 1
        toolcode = ['0', '0', '0', '000', '00', '00', '$', '', '$', '']
315
316 1
        full_name = ' '.join((lname, fname))
317
318 1
        if qual in self._qual_3:
319 1
            toolcode[0] = '3'
320 1
        elif qual in self._qual_2:
321 1
            toolcode[0] = '2'
322 1
        elif qual in self._qual_1:
323 1
            toolcode[0] = '1'
324
325
        # Fill field 1 (punctuation)
326 1
        if '.' in full_name:
327 1
            toolcode[1] = '2'
328
        else:
329 1
            for punct in ',-/:;"&\'()!{|}?$%*+<=>[\\]^_`~':
330 1
                if punct in full_name:
331 1
                    toolcode[1] = '1'
332 1
                    break
333
334 1
        elderyounger = ''  # save elder/younger for possible movement later
335 1
        for gen in self._gen_1:
336 1
            if gen in full_name:
337 1
                toolcode[2] = '1'
338 1
                elderyounger = gen
339 1
                break
340
        else:
341 1
            for gen in self._gen_2:
342 1
                if gen in full_name:
343 1
                    toolcode[2] = '2'
344 1
                    elderyounger = gen
345 1
                    break
346
347
        # do comma flip
348 1
        if normalize:
349 1
            comma = lname.find(',')
350 1
            if comma != -1:
351 1
                lname_end = lname[comma + 1 :]
352 1
                while lname_end[0] in {' ', ','}:
353 1
                    lname_end = lname_end[1:]
354 1
                fname = lname_end + ' ' + fname
355 1
                lname = lname[:comma].strip()
356
357
        # do elder/younger move
358 1
        if normalize == 2 and elderyounger:
359 1
            elderyounger_loc = fname.find(elderyounger)
360 1
            if elderyounger_loc != -1:
361 1
                lname = ' '.join((lname, elderyounger.strip()))
362 1
                fname = ' '.join(
363
                    (
364
                        fname[:elderyounger_loc].strip(),
365
                        fname[elderyounger_loc + len(elderyounger) :],
366
                    )
367
                ).strip()
368
369 1
        toolcode[4] = '{:02d}'.format(len(fname))
370 1
        toolcode[5] = '{:02d}'.format(len(lname))
371
372
        # strip punctuation
373 1
        for char in ',/:;"&()!{|}?$%*+<=>[\\]^_`~':
374 1
            full_name = full_name.replace(char, '')
375 1
        for pos, char in enumerate(full_name):
376 1
            if char == '-' and full_name[pos - 1 : pos + 2] != 'b-g':
377 1
                full_name = full_name[:pos] + ' ' + full_name[pos + 1 :]
378
379
        # Fill field 9 (search range)
380 1
        for letter in [_[0] for _ in full_name.split()]:
381 1
            if letter not in toolcode[9]:
382 1
                toolcode[9] += letter
383 1
            if len(toolcode[9]) == 15:
384 1
                break
385
386 1
        def roman_check(numeral, fname, lname):
387
            """Move Roman numerals from first name to last.
388
389
            Parameters
390
            ----------
391
            numeral : str
392
                Roman numeral
393
            fname : str
394
                First name
395
            lname : str
396
                Last name
397
398
            Returns
399
            -------
400
            tuple
401
                First and last names with Roman numeral moved
402
403
            """
404 1
            loc = fname.find(numeral)
405 1
            if fname and (
406
                loc != -1
407
                and (len(fname[loc:]) == len(numeral))
408
                or fname[loc + len(numeral)] in {' ', ','}
409
            ):
410 1
                lname = ' '.join((lname, numeral))
411 1
                fname = ' '.join(
412
                    (
413
                        fname[:loc].strip(),
414
                        fname[loc + len(numeral) :].lstrip(' ,'),
415
                    )
416
                )
417 1
            return fname.strip(), lname.strip()
418
419
        # Fill fields 7 (specials) and 3 (roman numerals)
420 1
        for num, special in enumerate(self._synoname_special_table):
421 1
            roman, match, extra, method = special
422 1
            if method & self._method_dict['end']:
423 1
                match_context = ' ' + match
424 1
                loc = full_name.find(match_context)
425 1
                if (len(full_name) > len(match_context)) and (
426
                    loc == len(full_name) - len(match_context)
427
                ):
428 1
                    if roman:
429 1
                        if not any(
430
                            abbr in fname for abbr in ('i.', 'v.', 'x.')
431
                        ):
432 1
                            full_name = full_name[:loc]
433 1
                            toolcode[7] += '{:03d}'.format(num) + 'a'
434 1
                            if toolcode[3] == '000':
435 1
                                toolcode[3] = '{:03d}'.format(num)
436 1
                            if normalize == 2:
437 1
                                fname, lname = roman_check(match, fname, lname)
438
                    else:
439 1
                        full_name = full_name[:loc]
440 1
                        toolcode[7] += '{:03d}'.format(num) + 'a'
441 1
            if method & self._method_dict['middle']:
442 1
                match_context = ' ' + match + ' '
443 1
                loc = 0
444 1
                while loc != -1:
445 1
                    loc = full_name.find(match_context, loc + 1)
446 1
                    if loc > 0:
447 1
                        if roman:
448 1
                            if not any(
449
                                abbr in fname for abbr in ('i.', 'v.', 'x.')
450
                            ):
451 1
                                full_name = (
452
                                    full_name[:loc]
453
                                    + full_name[loc + len(match) + 1 :]
454
                                )
455 1
                                toolcode[7] += '{:03d}'.format(num) + 'b'
456 1
                                if toolcode[3] == '000':
457 1
                                    toolcode[3] = '{:03d}'.format(num)
458 1
                                if normalize == 2:
459 1
                                    fname, lname = roman_check(
460
                                        match, fname, lname
461
                                    )
462
                        else:
463 1
                            full_name = (
464
                                full_name[:loc]
465
                                + full_name[loc + len(match) + 1 :]
466
                            )
467 1
                            toolcode[7] += '{:03d}'.format(num) + 'b'
468 1
            if method & self._method_dict['beginning']:
469 1
                match_context = match + ' '
470 1
                loc = full_name.find(match_context)
471 1
                if loc == 0:
472 1
                    full_name = full_name[len(match) + 1 :]
473 1
                    toolcode[7] += '{:03d}'.format(num) + 'c'
474 1
            if method & self._method_dict['beginning_no_space']:
475 1
                loc = full_name.find(match)
476 1
                if loc == 0:
477 1
                    toolcode[7] += '{:03d}'.format(num) + 'd'
478 1
                    if full_name[: len(match)] not in toolcode[9]:
479 1
                        toolcode[9] += full_name[: len(match)]
480
481 1
            if extra:
482 1
                loc = full_name.find(extra)
483 1
                if loc != -1:
484 1
                    toolcode[7] += '{:03d}'.format(num) + 'X'
485
                    # Since extras are unique, we only look for each of them
486
                    # once, and they include otherwise impossible characters
487
                    # for this field, it's not possible for the following line
488
                    # to have ever been false.
489
                    # if full_name[loc:loc+len(extra)] not in toolcode[9]:
490 1
                    toolcode[9] += full_name[loc : loc + len(match)]
491
492 1
        return lname, fname, ''.join(toolcode)
493
494
495 1
def synoname_toolcode(lname, fname='', qual='', normalize=0):
496
    """Build the Synoname toolcode.
497
498
    This is a wrapper for :py:meth:`SynonameToolcode.fingerprint`.
499
500
    Parameters
501
    ----------
502
    lname : str
503
        Last name
504
    fname : str
505
        First name (can be blank)
506
    qual : str
507
        Qualifier
508
    normalize : int
509
        Normalization mode (0, 1, or 2)
510
511
    Returns
512
    -------
513
    tuple
514
        The transformed names and the synoname toolcode
515
516
    Examples
517
    --------
518
    >>> synoname_toolcode('hat')
519
    ('hat', '', '0000000003$$h')
520
    >>> synoname_toolcode('niall')
521
    ('niall', '', '0000000005$$n')
522
    >>> synoname_toolcode('colin')
523
    ('colin', '', '0000000005$$c')
524
    >>> synoname_toolcode('atcg')
525
    ('atcg', '', '0000000004$$a')
526
    >>> synoname_toolcode('entreatment')
527
    ('entreatment', '', '0000000011$$e')
528
529
    >>> synoname_toolcode('Ste.-Marie', 'Count John II', normalize=2)
530
    ('ste.-marie ii', 'count john', '0200491310$015b049a127c$smcji')
531
    >>> synoname_toolcode('Michelangelo IV', '', 'Workshop of')
532
    ('michelangelo iv', '', '3000550015$055b$mi')
533
534
    """
535 1
    return SynonameToolcode().fingerprint(lname, fname, qual, normalize)
536
537
538
if __name__ == '__main__':
539
    import doctest
540
541
    doctest.testmod()
542