Passed
Push — master ( 416c2f...9ec382 )
by Chris
01:03 queued 13s
created

abydos.fingerprint._synoname_toolcode   B

Complexity

Total Complexity 52

Size/Duplication

Total Lines 555
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 52
eloc 357
dl 0
loc 555
ccs 126
cts 126
cp 1
rs 7.44
c 0
b 0
f 0

2 Methods

Rating   Name   Duplication   Size   Complexity  
A SynonameToolcode.fingerprint() 0 51 1
F SynonameToolcode.fingerprint_tuple() 0 232 51

How to fix   Complexity   

Complexity

Complex classes like abydos.fingerprint._synoname_toolcode often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# Copyright 2018-2020 by Christopher C. Little.
2
# This file is part of Abydos.
3
#
4
# Abydos is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# Abydos is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17
"""abydos.fingerprint._synoname.
18
19 1
Synoname toolcode
20
"""
21
22
from typing import Tuple
23
24 1
from ._fingerprint import _Fingerprint
25
26
__all__ = ['SynonameToolcode']
27
28
29
class SynonameToolcode(_Fingerprint):
30
    """Synoname Toolcode.
31 1
32
    Cf. :cite:`Getty:1991,Gross:1991`.
33 1
34 1
    .. versionadded:: 0.3.6
35
    """
36 1
37
    _synoname_special_table = (
38
        # Roman, match, extra, method
39 1
        (False, 'NONE', '', 0),
40
        (False, 'aine', '', 3),
41
        (False, 'also erroneously', '', 4),
42
        (False, 'also identified with the', '', 2),
43
        (False, 'also identified with', '', 2),
44
        (False, 'archbishop', '', 7),
45
        (False, 'atelier', '', 7),
46
        (False, 'baron', '', 7),
47 1
        (False, 'cadet', '', 3),
48
        (False, 'cardinal', '', 7),
49
        (False, 'circle of', '', 5),
50
        (False, 'circle', '', 5),
51
        (False, 'class of', '', 5),
52
        (False, 'conde de', '', 7),
53
        (False, 'countess', '', 7),
54
        (False, 'count', '', 7),
55
        (False, "d'", " d'", 15),
56
        (False, 'dai', '', 15),
57
        (False, "dall'", " dall'", 15),
58
        (False, 'dalla', '', 15),
59
        (False, 'dalle', '', 15),
60
        (False, 'dal', '', 15),
61
        (False, 'da', '', 15),
62
        (False, 'degli', '', 15),
63
        (False, 'della', '', 15),
64
        (False, 'del', '', 15),
65
        (False, 'den', '', 15),
66
        (False, 'der altere', '', 3),
67
        (False, 'der jungere', '', 3),
68
        (False, 'der', '', 15),
69
        (False, 'de la', '', 15),
70
        (False, 'des', '', 15),
71
        (False, "de'", " de'", 15),
72
        (False, 'de', '', 15),
73
        (False, 'di ser', '', 7),
74
        (False, 'di', '', 15),
75
        (False, 'dos', '', 15),
76
        (False, 'du', '', 15),
77
        (False, 'duke of', '', 7),
78
        (False, 'earl of', '', 7),
79
        (False, 'el', '', 15),
80
        (False, 'fils', '', 3),
81
        (False, 'florentine follower of', '', 5),
82
        (False, 'follower of', '', 5),
83
        (False, 'fra', '', 7),
84
        (False, 'freiherr von', '', 7),
85
        (False, 'giovane', '', 7),
86
        (False, 'group', '', 5),
87
        (True, 'iii', '', 3),
88
        (True, 'ii', '', 3),
89
        (False, 'il giovane', '', 7),
90
        (False, 'il vecchio', '', 7),
91
        (False, 'il', '', 15),
92
        (False, "in't", '', 7),
93
        (False, 'in het', '', 7),
94
        (True, 'iv', '', 3),
95
        (True, 'ix', '', 3),
96
        (True, 'i', '', 3),
97
        (False, 'jr.', '', 3),
98
        (False, 'jr', '', 3),
99
        (False, 'juniore', '', 3),
100
        (False, 'junior', '', 3),
101
        (False, 'king of', '', 7),
102
        (False, "l'", " l'", 15),
103
        (False, "l'aine", '', 3),
104
        (False, 'la', '', 15),
105
        (False, 'le jeune', '', 3),
106
        (False, 'le', '', 15),
107
        (False, 'lo', '', 15),
108
        (False, 'maestro', '', 7),
109
        (False, 'maitre', '', 7),
110
        (False, 'marchioness', '', 7),
111
        (False, 'markgrafin von', '', 7),
112
        (False, 'marquess', '', 7),
113
        (False, 'marquis', '', 7),
114
        (False, 'master of the', '', 7),
115
        (False, 'master of', '', 7),
116
        (False, 'master known as the', '', 7),
117
        (False, 'master with the', '', 7),
118
        (False, 'master with', '', 7),
119
        (False, 'masters', '', 7),
120
        (False, 'master', '', 7),
121
        (False, 'meister', '', 7),
122
        (False, 'met de', '', 7),
123
        (False, 'met', '', 7),
124
        (False, 'mlle.', '', 7),
125
        (False, 'mlle', '', 7),
126
        (False, 'monogrammist', '', 7),
127
        (False, 'monsu', '', 7),
128
        (False, 'nee', '', 2),
129
        (False, 'of', '', 3),
130
        (False, 'oncle', '', 3),
131
        (False, 'op den', '', 15),
132
        (False, 'op de', '', 15),
133
        (False, 'or', '', 2),
134
        (False, 'over den', '', 15),
135
        (False, 'over de', '', 15),
136
        (False, 'over', '', 7),
137
        (False, 'p.re', '', 7),
138
        (False, 'p.r.a.', '', 1),
139
        (False, 'padre', '', 7),
140
        (False, 'painter', '', 7),
141
        (False, 'pere', '', 3),
142
        (False, 'possibly identified with', '', 6),
143
        (False, 'possibly', '', 6),
144
        (False, 'pseudo', '', 15),
145
        (False, 'r.a.', '', 1),
146
        (False, 'reichsgraf von', '', 7),
147
        (False, 'ritter von', '', 7),
148
        (False, 'sainte-', ' sainte-', 8),
149
        (False, 'sainte', '', 7),
150
        (False, 'saint-', ' saint-', 8),
151
        (False, 'saint', '', 7),
152
        (False, 'santa', '', 15),
153
        (False, "sant'", " sant'", 15),
154
        (False, 'san', '', 15),
155
        (False, 'ser', '', 7),
156
        (False, 'seniore', '', 3),
157
        (False, 'senior', '', 3),
158
        (False, 'sir', '', 5),
159
        (False, 'sr.', '', 3),
160
        (False, 'sr', '', 3),
161
        (False, 'ss.', ' ss.', 14),
162
        (False, 'ss', '', 6),
163
        (False, 'st-', ' st-', 8),
164
        (False, 'st.', ' st.', 15),
165
        (False, 'ste-', ' ste-', 8),
166
        (False, 'ste.', ' ste.', 15),
167
        (False, 'studio', '', 7),
168
        (False, 'sub-group', '', 5),
169
        (False, 'sultan of', '', 7),
170
        (False, 'ten', '', 15),
171
        (False, 'ter', '', 15),
172
        (False, 'the elder', '', 3),
173
        (False, 'the younger', '', 3),
174
        (False, 'the', '', 7),
175
        (False, 'tot', '', 15),
176
        (False, 'unidentified', '', 1),
177
        (False, 'van den', '', 15),
178
        (False, 'van der', '', 15),
179
        (False, 'van de', '', 15),
180
        (False, 'vanden', '', 15),
181
        (False, 'vander', '', 15),
182
        (False, 'van', '', 15),
183
        (False, 'vecchia', '', 7),
184
        (False, 'vecchio', '', 7),
185
        (True, 'viii', '', 3),
186
        (True, 'vii', '', 3),
187
        (True, 'vi', '', 3),
188
        (True, 'v', '', 3),
189
        (False, 'vom', '', 7),
190
        (False, 'von', '', 15),
191
        (False, 'workshop', '', 7),
192
        (True, 'xiii', '', 3),
193
        (True, 'xii', '', 3),
194
        (True, 'xiv', '', 3),
195
        (True, 'xix', '', 3),
196
        (True, 'xi', '', 3),
197
        (True, 'xviii', '', 3),
198
        (True, 'xvii', '', 3),
199
        (True, 'xvi', '', 3),
200
        (True, 'xv', '', 3),
201
        (True, 'xx', '', 3),
202
        (True, 'x', '', 3),
203
        (False, 'y', '', 7),
204
    )
205
206
    _method_dict = {
207
        'end': 1,
208
        'middle': 2,
209
        'beginning': 4,
210
        'beginning_no_space': 8,
211
    }
212
213
    # Fill field 0 (qualifier)
214
    _qual_3 = {
215
        'adaptation after',
216 1
        'after',
217
        'assistant of',
218
        'assistants of',
219
        'circle of',
220
        'follower of',
221
        'imitator of',
222
        'in the style of',
223
        'manner of',
224 1
        'pupil of',
225
        'school of',
226
        'studio of',
227
        'style of',
228
        'workshop of',
229
    }
230
    _qual_2 = {'copy after', 'copy after?', 'copy of'}
231
    _qual_1 = {
232
        'ascribed to',
233
        'attributed to or copy after',
234
        'attributed to',
235
        'possibly',
236
    }
237
238
    # Fill field 2 (generation)
239
    _gen_1 = (
240 1
        'the elder',
241 1
        ' sr.',
242
        ' sr',
243
        'senior',
244
        'der altere',
245
        'il vecchio',
246
        "l'aine",
247
        'p.re',
248
        'padre',
249 1
        'seniore',
250
        'vecchia',
251
        'vecchio',
252
    )
253
    _gen_2 = (
254
        ' jr.',
255
        ' jr',
256
        'der jungere',
257
        'il giovane',
258
        'giovane',
259
        'juniore',
260
        'junior',
261
        'le jeune',
262
        'the younger',
263 1
    )
264
265
    def fingerprint(
266
        self, lname: str, fname: str = '', qual: str = '', normalize: int = 0
267
    ) -> str:
268
        """Build the Synoname toolcode.
269
270
        Parameters
271
        ----------
272
        lname : str
273
            Last name
274
        fname : str
275 1
            First name (can be blank)
276
        qual : str
277
            Qualifier
278
        normalize : int
279
            Normalization mode (0, 1, or 2)
280
281
        Returns
282
        -------
283
        str
284
            The transformed names and the synoname toolcode, separated by
285
            commas
286
287
        Examples
288
        --------
289
        >>> st = SynonameToolcode()
290
        >>> st.fingerprint('hat')
291
        'hat,,0000000003$$h'
292
        >>> st.fingerprint('niall')
293
        'niall,,0000000005$$n'
294
        >>> st.fingerprint('colin')
295
        'colin,,0000000005$$c'
296
        >>> st.fingerprint('atcg')
297
        'atcg,,0000000004$$a'
298
        >>> st.fingerprint('entreatment')
299
        'entreatment,,0000000011$$e'
300
301
        >>> st.fingerprint('Ste.-Marie', 'Count John II', normalize=2)
302
        'ste.-marie ii,count john,0200491310$015b049a127c$smcji'
303
        >>> st.fingerprint('Michelangelo IV', '', 'Workshop of')
304
        'michelangelo iv,,3000550015$055b$mi'
305
306
307
        .. versionadded:: 0.3.0
308
        .. versionchanged:: 0.3.6
309
            Encapsulated in class
310
        .. versionchanged:: 0.6.0
311
            Changed to return a comma-separated string instead of 3-tuple of
312
            strs
313
314
        """
315
        return ','.join(self.fingerprint_tuple(lname, fname, qual, normalize))
316
317
    def fingerprint_tuple(
318
        self, lname: str, fname: str = '', qual: str = '', normalize: int = 0
319 1
    ) -> Tuple[str, str, str]:
320 1
        """Build the Synoname toolcode.
321 1
322
        Parameters
323
        ----------
324 1
        lname : str
325
            Last name
326 1
        fname : str
327
            First name (can be blank)
328 1
        qual : str
329 1
            Qualifier
330 1
        normalize : int
331 1
            Normalization mode (0, 1, or 2)
332 1
333 1
        Returns
334
        -------
335
        tuple
336 1
            The transformed names and the synoname toolcode
337 1
338
        Examples
339 1
        --------
340 1
        >>> st = SynonameToolcode()
341 1
        >>> st.fingerprint_tuple('hat')
342 1
        ('hat', '', '0000000003$$h')
343
        >>> st.fingerprint_tuple('niall')
344 1
        ('niall', '', '0000000005$$n')
345 1
        >>> st.fingerprint_tuple('colin')
346 1
        ('colin', '', '0000000005$$c')
347 1
        >>> st.fingerprint_tuple('atcg')
348 1
        ('atcg', '', '0000000004$$a')
349 1
        >>> st.fingerprint_tuple('entreatment')
350
        ('entreatment', '', '0000000011$$e')
351 1
352 1
        >>> st.fingerprint_tuple('Ste.-Marie', 'Count John II', normalize=2)
353 1
        ('ste.-marie ii', 'count john', '0200491310$015b049a127c$smcji')
354 1
        >>> st.fingerprint_tuple('Michelangelo IV', '', 'Workshop of')
355 1
        ('michelangelo iv', '', '3000550015$055b$mi')
356
357
358 1
        .. versionadded:: 0.6.0
359 1
360 1
        """
361 1
        lname = lname.lower()
362 1
        fname = fname.lower()
363 1
        qual = qual.lower()
364 1
365 1
        # Start with the basic code
366
        toolcode = ['0', '0', '0', '000', '00', '00', '$', '', '$', '']
367
368 1
        full_name = ' '.join((lname, fname))
369 1
370 1
        if qual in self._qual_3:
371 1
            toolcode[0] = '3'
372 1
        elif qual in self._qual_2:
373
            toolcode[0] = '2'
374
        elif qual in self._qual_1:
375
            toolcode[0] = '1'
376
377
        # Fill field 1 (punctuation)
378
        if '.' in full_name:
379 1
            toolcode[1] = '2'
380 1
        else:
381
            for punct in ',-/:;"&\'()!{|}?$%*+<=>[\\]^_`~':
382
                if punct in full_name:
383 1
                    toolcode[1] = '1'
384 1
                    break
385 1
386 1
        elderyounger = ''  # save elder/younger for possible movement later
387 1
        for gen in self._gen_1:
388
            if gen in full_name:
389
                toolcode[2] = '1'
390 1
                elderyounger = gen
391 1
                break
392 1
        else:
393 1
            for gen in self._gen_2:
394 1
                if gen in full_name:
395
                    toolcode[2] = '2'
396 1
                    elderyounger = gen
397
                    break
398
399
        # do comma flip
400
        if normalize:
401
            comma = lname.find(',')
402
            if comma != -1:
403
                lname_end = lname[comma + 1 :]
404
                while lname_end[0] in {' ', ','}:
405
                    lname_end = lname_end[1:]
406
                fname = lname_end + ' ' + fname
407
                lname = lname[:comma].strip()
408
409
        # do elder/younger move
410
        if normalize == 2 and elderyounger:
411
            elderyounger_loc = fname.find(elderyounger)
412
            if elderyounger_loc != -1:
413
                lname = ' '.join((lname, elderyounger.strip()))
414
                fname = ' '.join(
415
                    (
416 1
                        fname[:elderyounger_loc].strip(),
417 1
                        fname[elderyounger_loc + len(elderyounger) :],
418
                    )
419
                ).strip()
420
421
        toolcode[4] = '{:02d}'.format(len(fname))
422 1
        toolcode[5] = '{:02d}'.format(len(lname))
423 1
424
        # strip punctuation
425
        for char in ',/:;"&()!{|}?$%*+<=>[\\]^_`~':
426
            full_name = full_name.replace(char, '')
427
        for pos, char in enumerate(full_name):
428
            if char == '-' and full_name[pos - 1 : pos + 2] != 'b-g':
429 1
                full_name = full_name[:pos] + ' ' + full_name[pos + 1 :]
430
431
        # Fill field 9 (search range)
432 1
        for letter in [_[0] for _ in full_name.split()]:
433 1
            if letter not in toolcode[9]:
434 1
                toolcode[9] += letter
435 1
            if len(toolcode[9]) == 15:
436 1
                break
437 1
438
        def roman_check(
439
            numeral: str, fname: str, lname: str
440 1
        ) -> Tuple[str, str]:
441 1
            """Move Roman numerals from first name to last.
442
443
            Parameters
444 1
            ----------
445 1
            numeral : str
446 1
                Roman numeral
447 1
            fname : str
448 1
                First name
449 1
            lname : str
450
                Last name
451 1
452 1
            Returns
453 1
            -------
454 1
            tuple
455 1
                First and last names with Roman numeral moved
456 1
457 1
            .. versionadded:: 0.3.0
458 1
459 1
            """
460 1
            loc = fname.find(numeral)
461
            if fname and (
462
                loc != -1
463 1
                and (len(fname[loc:]) == len(numeral))
464
                or fname[loc + len(numeral)] in {' ', ','}
465
            ):
466
                lname = ' '.join((lname, numeral))
467 1
                fname = ' '.join(
468 1
                    (
469 1
                        fname[:loc].strip(),
470 1
                        fname[loc + len(numeral) :].lstrip(' ,'),
471 1
                    )
472
                )
473
            return fname.strip(), lname.strip()
474
475 1
        # Fill fields 7 (specials) and 3 (roman numerals)
476
        for num, special in enumerate(self._synoname_special_table):
477
            roman, match, extra, method = special
478
            if method & self._method_dict['end']:
479 1
                match_context = ' ' + match
480 1
                loc = full_name.find(match_context)
481 1
                if (len(full_name) > len(match_context)) and (
482 1
                    loc == len(full_name) - len(match_context)
483 1
                ):
484 1
                    if roman:
485 1
                        if not any(
486 1
                            abbr in fname for abbr in ('i.', 'v.', 'x.')
487 1
                        ):
488 1
                            full_name = full_name[:loc]
489 1
                            toolcode[7] += '{:03d}'.format(num) + 'a'
490 1
                            if toolcode[3] == '000':
491 1
                                toolcode[3] = '{:03d}'.format(num)
492
                            if normalize == 2:
493 1
                                fname, lname = roman_check(match, fname, lname)
494 1
                    else:
495 1
                        full_name = full_name[:loc]
496 1
                        toolcode[7] += '{:03d}'.format(num) + 'a'
497
            if method & self._method_dict['middle']:
498
                match_context = ' ' + match + ' '
499
                loc = 0
500
                while loc != -1:
501
                    loc = full_name.find(match_context, loc + 1)
502 1
                    if loc > 0:
503
                        if roman:
504 1
                            if not any(
505
                                abbr in fname for abbr in ('i.', 'v.', 'x.')
506
                            ):
507 1
                                full_name = (
508
                                    full_name[:loc]
509
                                    + full_name[loc + len(match) + 1 :]
510
                                )
511
                                toolcode[7] += '{:03d}'.format(num) + 'b'
512
                                if toolcode[3] == '000':
513 1
                                    toolcode[3] = '{:03d}'.format(num)
514
                                if normalize == 2:
515
                                    fname, lname = roman_check(
516
                                        match, fname, lname
517
                                    )
518
                        else:
519
                            full_name = (
520
                                full_name[:loc]
521
                                + full_name[loc + len(match) + 1 :]
522
                            )
523
                            toolcode[7] += '{:03d}'.format(num) + 'b'
524
            if method & self._method_dict['beginning']:
525
                match_context = match + ' '
526
                loc = full_name.find(match_context)
527
                if loc == 0:
528
                    full_name = full_name[len(match) + 1 :]
529
                    toolcode[7] += '{:03d}'.format(num) + 'c'
530
            if method & self._method_dict['beginning_no_space']:
531
                loc = full_name.find(match)
532
                if loc == 0:
533
                    toolcode[7] += '{:03d}'.format(num) + 'd'
534
                    if full_name[: len(match)] not in toolcode[9]:
535
                        toolcode[9] += full_name[: len(match)]
536
537
            if extra:
538
                loc = full_name.find(extra)
539
                if loc != -1:
540
                    toolcode[7] += '{:03d}'.format(num) + 'X'
541
                    # Since extras are unique, we only look for each of them
542
                    # once, and they include otherwise impossible characters
543
                    # for this field, it's not possible for the following line
544
                    # to have ever been false.
545
                    # if full_name[loc:loc+len(extra)] not in toolcode[9]:
546
                    toolcode[9] += full_name[loc : loc + len(match)]
547
548
        return lname, fname, ''.join(toolcode)
549
550
551
if __name__ == '__main__':
552
    import doctest
553
554
    doctest.testmod()
555