Passed
Push — master ( 416c2f...9ec382 )
by Chris
01:03 queued 13s
created

abydos.distance._synoname.Synoname.dist_abs()   A

Complexity

Conditions 1

Size

Total Lines 29
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 1

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 29
ccs 9
cts 9
cp 1
rs 10
c 0
b 0
f 0
cc 1
nop 3
crap 1
1
# Copyright 2018-2020 by Christopher C. Little.
2
# This file is part of Abydos.
3
#
4
# Abydos is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# Abydos is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17
"""abydos.distance._synoname.
18
19 1
Synoname.
20
"""
21
22
from collections import Iterable
23
from typing import (
24 1
    Any,
25
    Dict,
26
    Iterable as TIterable,
27
    List,
28
    Optional,
29
    Tuple,
30
    Union,
31 1
    cast,
32
)
33 1
34
from ._distance import _Distance
35 1
from ._levenshtein import Levenshtein
36 1
from ._ratcliff_obershelp import RatcliffObershelp
37 1
38 1
# noinspection PyProtectedMember
39
from ..fingerprint._synoname_toolcode import SynonameToolcode
40
41 1
__all__ = ['Synoname']
42
43 1
44
class Synoname(_Distance):
45
    """Synoname.
46 1
47
    Cf. :cite:`Getty:1991,Gross:1991`
48
49
    .. versionadded:: 0.3.6
50
    """
51
52
    _lev = Levenshtein()
53
    _ratcliff_obershelp = RatcliffObershelp()
54 1
55
    _stc = SynonameToolcode()
56 1
57
    _test_dict = {
58
        key: 2 ** n
59
        for n, key in enumerate(
60
            (
61
                'exact',
62
                'omission',
63
                'substitution',
64
                'transposition',
65
                'punctuation',
66
                'initials',
67
                'extension',
68
                'inclusion',
69
                'no_first',
70
                'word_approx',
71
                'confusions',
72
                'char_approx',
73
            )
74
        )
75 1
    }  # type: Dict[str, int]
76
    _match_name = (
77
        '',
78
        'exact',
79
        'omission',
80
        'substitution',
81
        'transposition',
82
        'punctuation',
83
        'initials',
84
        'extension',
85
        'inclusion',
86
        'no_first',
87
        'word_approx',
88
        'confusions',
89
        'char_approx',
90
        'no_match',
91 1
    )
92
    _match_type_dict = {val: n for n, val in enumerate(_match_name)}
93 1
94
    def _synoname_strip_punct(self, word: str) -> str:
95
        """Return a word with punctuation stripped out.
96
97
        Parameters
98
        ----------
99
        word : str
100
            A word to strip punctuation from
101
102
        Returns
103
        -------
104
        str
105
            The word stripped of punctuation
106
107
        Examples
108
        --------
109
        >>> pe = Synoname()
110
        >>> pe._synoname_strip_punct('AB;CD EF-GH$IJ')
111
        'ABCD EFGHIJ'
112
113
114
        .. versionadded:: 0.3.0
115
        .. versionchanged:: 0.3.6
116
            Encapsulated in class
117
118 1
        """
119 1
        stripped = ''
120 1
        for char in word:
121 1
            if char not in set(',-./:;"&\'()!{|}?$%*+<=>[\\]^_`~'):
122 1
                stripped += char
123
        return stripped.strip()
124 1
125
    def _synoname_word_approximation(
126
        self,
127
        src_ln: str,
128
        tar_ln: str,
129
        src_fn: str = '',
130
        tar_fn: str = '',
131
        features: Optional[
132
            Dict[str, Union[bool, List[Tuple[int, str]]]]
133
        ] = None,
134
    ) -> float:
135
        """Return the Synoname word approximation score for two names.
136
137
        Parameters
138
        ----------
139
        src_ln : str
140
            Last name of the source
141
        tar_ln : str
142
            Last name of the target
143
        src_fn : str
144
            First name of the source (optional)
145
        tar_fn : str
146
            First name of the target (optional)
147
        features : dict
148
            A dict containing special features calculated using
149
            :py:class:`fingerprint.SynonameToolcode` (optional)
150
151
        Returns
152
        -------
153
        float
154
            The word approximation score
155
156
        Examples
157
        --------
158
        >>> pe = Synoname()
159
        >>> pe._synoname_word_approximation('Smith Waterman', 'Waterman',
160
        ... 'Tom Joe Bob', 'Tom Joe')
161 1
        0.6
162 1
163 1
164 1
        .. versionadded:: 0.3.0
165 1
        .. versionchanged:: 0.3.6
166 1
            Encapsulated in class
167
168 1
        """
169 1
        if features is None:
170
            features = {}
171
        if 'src_specials' not in features:
172 1
            features['src_specials'] = []
173
        if 'tar_specials' not in features:
174
            features['tar_specials'] = []
175 1
176
        src_len_specials = len(
177
            cast(List[Tuple[int, str]], features['src_specials'])
178 1
        )
179 1
        tar_len_specials = len(
180 1
            cast(List[Tuple[int, str]], features['tar_specials'])
181 1
        )
182
183
        # 1
184
        if ('gen_conflict' in features and features['gen_conflict']) or (
185
            'roman_conflict' in features and features['roman_conflict']
186
        ):
187
            return 0
188
189
        # 3 & 7
190
        full_tar1 = ' '.join((tar_ln, tar_fn)).replace('-', ' ').strip()
191 1
        for s_pos, s_type in cast(
192 1
            List[Tuple[int, str]], features['tar_specials']
193
        ):
194
            if s_type == 'a':
195
                full_tar1 = full_tar1[
196
                    : -(
197
                        1
198
                        + len(
199
                            self._stc._synoname_special_table[  # noqa: SF01
200
                                s_pos
201
                            ][1]
202 1
                        )
203
                    )
204
                ]
205
            elif s_type == 'b':
206
                loc = (
207
                    full_tar1.find(
208
                        ' '
209
                        + self._stc._synoname_special_table[  # noqa: SF01
210
                            s_pos
211
                        ][1]
212
                        + ' '
213 1
                    )
214 1
                    + 1
215
                )
216
                full_tar1 = (
217
                    full_tar1[:loc]
218
                    + full_tar1[
219
                        loc
220
                        + len(
221
                            self._stc._synoname_special_table[  # noqa: SF01
222
                                s_pos
223 1
                            ][1]
224 1
                        ) :
225 1
                    ]
226 1
                )
227
            elif s_type == 'c':
228
                full_tar1 = full_tar1[
229
                    1
230
                    + len(
231
                        self._stc._synoname_special_table[s_pos][  # noqa: SF01
232
                            1
233
                        ]
234
                    ) :
235
                ]
236 1
237 1
        full_src1 = ' '.join((src_ln, src_fn)).replace('-', ' ').strip()
238
        for s_pos, s_type in cast(
239
            List[Tuple[int, str]], features['src_specials']
240
        ):
241
            if s_type == 'a':
242
                full_src1 = full_src1[
243
                    : -(
244
                        1
245
                        + len(
246
                            self._stc._synoname_special_table[  # noqa: SF01
247 1
                                s_pos
248
                            ][1]
249
                        )
250
                    )
251
                ]
252
            elif s_type == 'b':
253
                loc = (
254
                    full_src1.find(
255
                        ' '
256
                        + self._stc._synoname_special_table[  # noqa: SF01
257
                            s_pos
258 1
                        ][1]
259 1
                        + ' '
260
                    )
261
                    + 1
262
                )
263
                full_src1 = (
264
                    full_src1[:loc]
265
                    + full_src1[
266
                        loc
267
                        + len(
268 1
                            self._stc._synoname_special_table[  # noqa: SF01
269 1
                                s_pos
270 1
                            ][1]
271 1
                        ) :
272
                    ]
273
                )
274
            elif s_type == 'c':
275
                full_src1 = full_src1[
276
                    1
277
                    + len(
278 1
                        self._stc._synoname_special_table[s_pos][  # noqa: SF01
279
                            1
280
                        ]
281
                    ) :
282
                ]
283 1
284
        full_tar2 = full_tar1
285
        for s_pos, s_type in cast(
286
            List[Tuple[int, str]], features['tar_specials']
287 1
        ):
288
            if s_type == 'd':
289
                full_tar2 = full_tar2[
290
                    len(
291
                        self._stc._synoname_special_table[s_pos][  # noqa: SF01
292
                            1
293
                        ]
294
                    ) :
295
                ]
296
            elif (
297
                s_type == 'X'
298
                and self._stc._synoname_special_table[s_pos][1]  # noqa: SF01
299 1
                in full_tar2
300 1
            ):
301 1
                loc = full_tar2.find(
302 1
                    ' '
303
                    + self._stc._synoname_special_table[s_pos][1]  # noqa: SF01
304
                )
305
                full_tar2 = (
306
                    full_tar2[:loc]
307
                    + full_tar2[
308
                        loc
309 1
                        + len(
310
                            self._stc._synoname_special_table[  # noqa: SF01
311
                                s_pos
312
                            ][1]
313
                        ) :
314 1
                    ]
315
                )
316
317
        full_src2 = full_src1
318 1
        for s_pos, s_type in cast(
319
            List[Tuple[int, str]], features['src_specials']
320
        ):
321
            if s_type == 'd':
322
                full_src2 = full_src2[
323
                    len(
324
                        self._stc._synoname_special_table[s_pos][  # noqa: SF01
325
                            1
326
                        ]
327
                    ) :
328
                ]
329
            elif (
330 1
                s_type == 'X'
331 1
                and self._stc._synoname_special_table[s_pos][1]  # noqa: SF01
332 1
                in full_src2
333
            ):
334 1
                loc = full_src2.find(
335 1
                    ' '
336 1
                    + self._stc._synoname_special_table[s_pos][1]  # noqa: SF01
337
                )
338 1
                full_src2 = (
339 1
                    full_src2[:loc]
340 1
                    + full_src2[
341
                        loc
342 1
                        + len(
343 1
                            self._stc._synoname_special_table[  # noqa: SF01
344 1
                                s_pos
345
                            ][1]
346
                        ) :
347 1
                    ]
348
                )
349
350
        full_tar1 = self._synoname_strip_punct(full_tar1)
351
        tar1_words = full_tar1.split()
352
        tar1_num_words = len(tar1_words)
353 1
354
        full_src1 = self._synoname_strip_punct(full_src1)
355
        src1_words = full_src1.split()
356 1
        src1_num_words = len(src1_words)
357
358
        full_tar2 = self._synoname_strip_punct(full_tar2)
359
        tar2_words = full_tar2.split()
360
        tar2_num_words = len(tar2_words)
361 1
362 1
        full_src2 = self._synoname_strip_punct(full_src2)
363 1
        src2_words = full_src2.split()
364
        src2_num_words = len(src2_words)
365
366 1
        # 2
367 1
        if (
368 1
            src1_num_words < 2
369 1
            and src_len_specials == 0
370
            and src2_num_words < 2
371 1
            and tar_len_specials == 0
372 1
        ):
373 1
            return 0
374 1
375
        # 4
376
        if (
377 1
            tar1_num_words == 1
378 1
            and src1_num_words == 1
379 1
            and tar1_words[0] == src1_words[0]
380 1
        ):
381 1
            return 1
382 1
        if tar1_num_words < 2 and tar_len_specials == 0:
383 1
            return 0
384 1
385 1
        # 5
386 1
        last_found = False
387
        for word in tar1_words:
388
            if src_ln.endswith(word) or word + ' ' in src_ln:
389
                last_found = True
390
391
        if not last_found:
392 1
            for word in src1_words:
393
                if tar_ln.endswith(word) or word + ' ' in tar_ln:
394
                    last_found = True
395 1
396
        # 6
397
        matches = 0
398
        if last_found:
399
            for i, s_word in enumerate(src1_words):
400 1
                for j, t_word in enumerate(tar1_words):
401
                    if s_word == t_word:
402
                        src1_words[i] = '@'
403
                        tar1_words[j] = '@'
404
                        matches += 1
405
        w_ratio = matches / max(tar1_num_words, src1_num_words)
406
        if matches > 1 or (
407 1
            matches == 1
408 1
            and src1_num_words == 1
409 1
            and tar1_num_words == 1
410 1
            and (tar_len_specials > 0 or src_len_specials > 0)
411
        ):
412 1
            return w_ratio
413 1
414 1
        # 8
415 1
        if (
416
            tar2_num_words == 1
417 1
            and src2_num_words == 1
418 1
            and tar2_words[0] == src2_words[0]
419
        ):
420
            return 1
421 1
        # I see no way that the following can be True if the equivalent in
422 1
        # #4 was False.
423 1
        if tar2_num_words < 2 and tar_len_specials == 0:  # pragma: no cover
424 1
            return 0
425 1
426 1
        # 9
427 1
        last_found = False
428 1
        for word in tar2_words:
429 1
            if src_ln.endswith(word) or word + ' ' in src_ln:
430 1
                last_found = True
431
432
        if not last_found:
433
            for word in src2_words:
434
                if tar_ln.endswith(word) or word + ' ' in tar_ln:
435
                    last_found = True
436
437
        if not last_found:
438 1
            return 0
439
440 1
        # 10
441
        matches = 0
442
        if last_found:
443
            for i, s_word in enumerate(src2_words):
444
                for j, t_word in enumerate(tar2_words):
445
                    if s_word == t_word:
446
                        src2_words[i] = '@'
447
                        tar2_words[j] = '@'
448
                        matches += 1
449
        w_ratio = matches / max(tar2_num_words, src2_num_words)
450
        if matches > 1 or (
451
            matches == 1
452
            and src2_num_words == 1
453
            and tar2_num_words == 1
454
            and (tar_len_specials > 0 or src_len_specials > 0)
455
        ):
456
            return w_ratio
457
458
        return 0
459
460
    def __init__(
461
        self,
462
        word_approx_min: float = 0.3,
463
        char_approx_min: float = 0.73,
464
        tests: Union[int, TIterable[str]] = 2 ** 12 - 1,
465
        ret_name: bool = False,
466
        **kwargs: Any
467
    ) -> None:
468
        """Initialize Synoname instance.
469
470 1
        Parameters
471 1
        ----------
472 1
        word_approx_min : float
473 1
            The minimum word approximation value to signal a 'word_approx'
474
            match
475 1
        char_approx_min : float
476 1
            The minimum character approximation value to signal a 'char_approx'
477 1
            match
478 1
        tests : int or Iterable
479 1
            Either an integer indicating tests to perform or a list of test
480 1
            names to perform (defaults to performing all tests)
481 1
        ret_name : bool
482
            If True, returns the match name rather than its integer equivalent
483 1
        **kwargs
484
            Arbitrary keyword arguments
485
486
487
        .. versionadded:: 0.4.0
488
489
        """
490
        super(Synoname, self).__init__(**kwargs)
491
        self._word_approx_min = word_approx_min
492
        self._char_approx_min = char_approx_min
493
        self._ret_name = ret_name
494
495
        if isinstance(tests, Iterable):
496
            self._tests = 0
497
            for term in tests:
498
                if term in self._test_dict:
499
                    self._tests += self._test_dict[term]
500
        else:
501
            self._tests = tests
502
503
    def dist_abs(
504
        self,
505
        src: Union[str, Tuple[str, str, str]],
506
        tar: Union[str, Tuple[str, str, str]],
507
    ) -> int:
508
        """Return the Synoname similarity type of two words.
509
510
        Parameters
511
        ----------
512
        src : str
513
            Source string for comparison
514
        tar : str
515
            Target string for comparison
516
517
        Returns
518
        -------
519
        int
520
            Synoname value
521 1
522 1
        Examples
523 1
        --------
524 1
        >>> cmp = Synoname()
525
        >>> cmp.dist_abs(('Breghel', 'Pieter', ''), ('Brueghel', 'Pieter', ''))
526 1
        2
527
528 1
        .. versionadded:: 0.6.0
529 1
530 1
        """
531 1
        return cast(int, self.sim_type(src, tar, True))
532
533 1
    def sim_type(
534
        self,
535 1
        src: Union[str, Tuple[str, str, str]],
536 1
        tar: Union[str, Tuple[str, str, str]],
537 1
        force_numeric: bool = False,
538 1
    ) -> Union[int, str]:
539 1
        """Return the Synoname similarity type of two words.
540 1
541
        Parameters
542 1
        ----------
543 1
        src : str
544 1
            Source string for comparison
545 1
        tar : str
546
            Target string for comparison
547
        force_numeric : bool
548
            Overrides the instance's ret_name setting
549
550 1
        Returns
551 1
        -------
552 1
        int (or str if ret_name is True)
553
            Synoname value
554 1
555 1
        Examples
556 1
        --------
557
        >>> cmp = Synoname()
558
        >>> cmp.sim_type(('Breghel', 'Pieter', ''), ('Brueghel', 'Pieter', ''))
559 1
        2
560
561
        >>> cmp = Synoname(ret_name=True)
562 1
        >>> cmp.sim_type(('Breghel', 'Pieter', ''), ('Brueghel', 'Pieter', ''))
563
        'omission'
564
        >>> cmp.sim_type(('Dore', 'Gustave', ''),
565
        ... ('Dore', 'Paul Gustave Louis Christophe', ''))
566 1
        'inclusion'
567 1
        >>> cmp.sim_type(('Pereira', 'I. R.', ''), ('Pereira', 'I. Smith', ''))
568 1
        'word_approx'
569 1
570 1
571
        .. versionadded:: 0.3.0
572 1
        .. versionchanged:: 0.3.6
573 1
            Encapsulated in class
574 1
        .. versionchanged:: 0.6.0
575 1
            Renamed dist_abs to sim_type and added dist_abs with standard
576 1
            interface
577
578 1
        """
579
        if isinstance(src, tuple):
580
            src_ln, src_fn, src_qual = src
581 1
        elif '#' in src:
582
            src_ln, src_fn, src_qual = src.split('#')[-3:]
583
        else:
584
            src_ln, src_fn, src_qual = src, '', ''
585 1
586 1
        if isinstance(tar, tuple):
587
            tar_ln, tar_fn, tar_qual = tar
588
        elif '#' in tar:
589 1
            tar_ln, tar_fn, tar_qual = tar.split('#')[-3:]
590 1
        else:
591 1
            tar_ln, tar_fn, tar_qual = tar, '', ''
592
593 1
        def _split_special(spec: str) -> List[Tuple[int, str]]:
594 1
            spec_list = []
595 1
            while spec:
596 1
                spec_list.append((int(spec[:3]), spec[3:4]))
597
                spec = spec[4:]
598
            return spec_list
599
600
        def _fmt_retval(val: int) -> Union[int, str]:
601
            if self._ret_name and not force_numeric:
602
                return self._match_name[val]
603 1
            return val
604 1
605
        # 1. Preprocessing
606 1
607 1
        # Lowercasing
608 1
        src_fn = src_fn.strip().lower()
609 1
        src_ln = src_ln.strip().lower()
610
        src_qual = src_qual.strip().lower()
611
612
        tar_fn = tar_fn.strip().lower()
613
        tar_ln = tar_ln.strip().lower()
614
        tar_qual = tar_qual.strip().lower()
615
616 1
        # Create toolcodes
617 1
        src_ln, src_fn, src_tc = self._stc.fingerprint_tuple(
618
            src_ln, src_fn, src_qual
619 1
        )
620 1
        tar_ln, tar_fn, tar_tc = self._stc.fingerprint_tuple(
621
            tar_ln, tar_fn, tar_qual
622 1
        )
623
624 1
        src_generation = int(src_tc[2])
625 1
        src_romancode = int(src_tc[3:6])
626 1
        src_len_fn = int(src_tc[6:8])
627 1
        src_specials = _split_special(src_tc.split('$')[1])
628
629
        tar_generation = int(tar_tc[2])
630
        tar_romancode = int(tar_tc[3:6])
631 1
        tar_len_fn = int(tar_tc[6:8])
632 1
        tar_specials = _split_special(tar_tc.split('$')[1])
633 1
634
        gen_conflict = (src_generation != tar_generation) and bool(
635
            src_generation or tar_generation
636
        )
637 1
        roman_conflict = (src_romancode != tar_romancode) and bool(
638 1
            src_romancode or tar_romancode
639 1
        )
640
641
        ln_equal = src_ln == tar_ln
642
        fn_equal = src_fn == tar_fn
643 1
644 1
        # approx_c
645
        def _approx_c() -> Tuple[bool, float]:
646
            if gen_conflict or roman_conflict:
647
                return False, 0.0
648 1
649 1
            full_src = ' '.join((src_ln, src_fn))
650 1
            if full_src.startswith('master '):
651
                full_src = full_src[len('master ') :]
652
                for intro in [
653
                    'of the ',
654 1
                    'of ',
655 1
                    'known as the ',
656
                    'with the ',
657
                    'with ',
658
                ]:
659 1
                    if full_src.startswith(intro):
660 1
                        full_src = full_src[len(intro) :]
661 1
662 1
            full_tar = ' '.join((tar_ln, tar_fn))
663 1
            if full_tar.startswith('master '):
664 1
                full_tar = full_tar[len('master ') :]
665
                for intro in [
666 1
                    'of the ',
667 1
                    'of ',
668
                    'known as the ',
669 1
                    'with the ',
670 1
                    'with ',
671 1
                ]:
672 1
                    if full_tar.startswith(intro):
673
                        full_tar = full_tar[len(intro) :]
674 1
675 1
            loc_ratio = self._ratcliff_obershelp.sim(full_src, full_tar)
676
            return loc_ratio >= self._char_approx_min, loc_ratio
677 1
678 1
        approx_c_result, ca_ratio = _approx_c()
679 1
680 1
        if self._tests & self._test_dict['exact'] and fn_equal and ln_equal:
681 1
            return _fmt_retval(self._match_type_dict['exact'])
682
        if self._tests & self._test_dict['omission']:
683
            self._lev._cost = (1, 1, 99, 99)  # noqa: SF01
684
            self._lev._mode = 'lev'  # noqa: SF01
685 1
            if fn_equal and self._lev.dist_abs(src_ln, tar_ln) == 1:
686 1
                if not roman_conflict:
687 1
                    return _fmt_retval(self._match_type_dict['omission'])
688 1
            elif ln_equal and self._lev.dist_abs(src_fn, tar_fn) == 1:
689 1
                return _fmt_retval(self._match_type_dict['omission'])
690 1
        if self._tests & self._test_dict['substitution']:
691 1
            self._lev._cost = (99, 99, 1, 99)  # noqa: SF01
692
            self._lev._mode = 'lev'  # noqa: SF01
693
            if fn_equal and self._lev.dist_abs(src_ln, tar_ln) == 1:
694
                return _fmt_retval(self._match_type_dict['substitution'])
695
            elif ln_equal and self._lev.dist_abs(src_fn, tar_fn) == 1:
696
                return _fmt_retval(self._match_type_dict['substitution'])
697
        if self._tests & self._test_dict['transposition']:
698
            self._lev._cost = (99, 99, 99, 1)  # noqa: SF01
699
            self._lev._mode = 'osa'  # noqa: SF01
700
            if fn_equal and (self._lev.dist_abs(src_ln, tar_ln) == 1):
701
                return _fmt_retval(self._match_type_dict['transposition'])
702
            elif ln_equal and (self._lev.dist_abs(src_fn, tar_fn) == 1):
703
                return _fmt_retval(self._match_type_dict['transposition'])
704
        if self._tests & self._test_dict['punctuation']:
705
            np_src_fn = self._synoname_strip_punct(src_fn)
706
            np_tar_fn = self._synoname_strip_punct(tar_fn)
707
            np_src_ln = self._synoname_strip_punct(src_ln)
708
            np_tar_ln = self._synoname_strip_punct(tar_ln)
709 1
710 1
            if (np_src_fn == np_tar_fn) and (np_src_ln == np_tar_ln):
711 1
                return _fmt_retval(self._match_type_dict['punctuation'])
712
713
            np_src_fn = self._synoname_strip_punct(src_fn.replace('-', ' '))
714 1
            np_tar_fn = self._synoname_strip_punct(tar_fn.replace('-', ' '))
715
            np_src_ln = self._synoname_strip_punct(src_ln.replace('-', ' '))
716
            np_tar_ln = self._synoname_strip_punct(tar_ln.replace('-', ' '))
717
718
            if (np_src_fn == np_tar_fn) and (np_src_ln == np_tar_ln):
719 1
                return _fmt_retval(self._match_type_dict['punctuation'])
720 1
721 1
        if self._tests & self._test_dict['initials'] and ln_equal:
722 1
            if src_fn and tar_fn:
723 1
                src_initials = self._synoname_strip_punct(src_fn).split()
724 1
                tar_initials = self._synoname_strip_punct(tar_fn).split()
725 1
                initials = bool(
726 1
                    (len(src_initials) == len(''.join(src_initials)))
727 1
                    or (len(tar_initials) == len(''.join(tar_initials)))
728
                )
729
                if initials:
730
                    src_initials_str = ''.join(_[0] for _ in src_initials)
731
                    tar_initials_str = ''.join(_[0] for _ in tar_initials)
732
                    if src_initials_str == tar_initials_str:
733
                        return _fmt_retval(self._match_type_dict['initials'])
734
                    initial_diff = abs(
735
                        len(src_initials_str) - len(tar_initials_str)
736
                    )
737
                    self._lev._cost = (1, 99, 99, 99)  # noqa: SF01
738
                    self._lev._mode = 'lev'  # noqa: SF01
739 1
                    if initial_diff and (
740 1
                        (
741
                            initial_diff
742
                            == self._lev.dist_abs(
743
                                src_initials_str, tar_initials_str,
744 1
                            )
745 1
                        )
746 1
                        or (
747 1
                            initial_diff
748 1
                            == self._lev.dist_abs(
749 1
                                src_initials_str, tar_initials_str,
750 1
                            )
751
                        )
752 1
                    ):
753
                        return _fmt_retval(self._match_type_dict['initials'])
754
        if self._tests & self._test_dict['extension']:
755
            if src_ln[1:2] == tar_ln[1:2] and (
756
                src_ln.startswith(tar_ln) or tar_ln.startswith(src_ln)
757
            ):
758
                if (
759
                    (not src_len_fn and not tar_len_fn)
760
                    or (tar_fn and src_fn.startswith(tar_fn))
761
                    or (src_fn and tar_fn.startswith(src_fn))
762
                ) and not roman_conflict:
763
                    return _fmt_retval(self._match_type_dict['extension'])
764
        if self._tests & self._test_dict['inclusion'] and ln_equal:
765
            if (src_fn and src_fn in tar_fn) or (tar_fn and tar_fn in src_ln):
766
                return _fmt_retval(self._match_type_dict['inclusion'])
767
        if self._tests & self._test_dict['no_first'] and ln_equal:
768
            if src_fn == '' or tar_fn == '':
769
                return _fmt_retval(self._match_type_dict['no_first'])
770
        if self._tests & self._test_dict['word_approx']:
771
            ratio = self._synoname_word_approximation(
772
                src_ln,
773 1
                tar_ln,
774
                src_fn,
775
                tar_fn,
776 1
                {
777
                    'gen_conflict': gen_conflict,
778
                    'roman_conflict': roman_conflict,
779
                    'src_specials': src_specials,
780
                    'tar_specials': tar_specials,
781
                },
782 1
            )
783
            if ratio == 1 and self._tests & self._test_dict['confusions']:
784
                if (
785
                    ' '.join((src_fn, src_ln)).strip()
786
                    == ' '.join((tar_fn, tar_ln)).strip()
787
                ):
788
                    return _fmt_retval(self._match_type_dict['confusions'])
789
            if ratio >= self._word_approx_min:
790
                return _fmt_retval(self._match_type_dict['word_approx'])
791
        if self._tests & self._test_dict['char_approx']:
792
            if ca_ratio >= self._char_approx_min:
793
                return _fmt_retval(self._match_type_dict['char_approx'])
794
        return _fmt_retval(self._match_type_dict['no_match'])
795
796
    def dist(self, src: str, tar: str) -> float:
797
        """Return the normalized Synoname distance between two words.
798
799
        Parameters
800
        ----------
801
        src : str
802
            Source string for comparison
803
        tar : str
804
            Target string for comparison
805
806
        Returns
807
        -------
808
        float
809
            Normalized Synoname distance
810
811
812
        .. versionadded:: 0.3.0
813
        .. versionchanged:: 0.3.6
814
            Encapsulated in class
815
816
        """
817
        return self.dist_abs(src, tar) / 14
818
819
820
if __name__ == '__main__':
821
    import doctest
822
823
    doctest.testmod()
824