Passed
Push — master ( c2a3b6...15a61d )
by Chris
01:00 queued 14s
created

abydos.distance._synoname.synoname()   A

Complexity

Conditions 1

Size

Total Lines 60
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
eloc 15
dl 0
loc 60
ccs 3
cts 3
cp 1
rs 9.65
c 0
b 0
f 0
cc 1
nop 6
crap 1

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# Copyright 2018-2020 by Christopher C. Little.
2
# This file is part of Abydos.
3
#
4
# Abydos is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# Abydos is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17
"""abydos.distance._synoname.
18
19 1
Synoname.
20
"""
21
22
from collections import Iterable
23
24 1
from ._distance import _Distance
25
from ._levenshtein import Levenshtein
26
from ._ratcliff_obershelp import RatcliffObershelp
27
28
# noinspection PyProtectedMember
29
from ..fingerprint._synoname_toolcode import SynonameToolcode
30
31 1
__all__ = ['Synoname']
32
33 1
34
class Synoname(_Distance):
35 1
    """Synoname.
36 1
37 1
    Cf. :cite:`Getty:1991,Gross:1991`
38 1
39
    .. versionadded:: 0.3.6
40
    """
41 1
42
    _lev = Levenshtein()
43 1
    _ratcliff_obershelp = RatcliffObershelp()
44
45
    _stc = SynonameToolcode()
46 1
47
    _test_dict = {
48
        val: 2 ** n
49
        for n, val in enumerate(
50
            (
51
                'exact',
52
                'omission',
53
                'substitution',
54 1
                'transposition',
55
                'punctuation',
56 1
                'initials',
57
                'extension',
58
                'inclusion',
59
                'no_first',
60
                'word_approx',
61
                'confusions',
62
                'char_approx',
63
            )
64
        )
65
    }
66
    _match_name = (
67
        '',
68
        'exact',
69
        'omission',
70
        'substitution',
71
        'transposition',
72
        'punctuation',
73
        'initials',
74
        'extension',
75 1
        'inclusion',
76
        'no_first',
77
        'word_approx',
78
        'confusions',
79
        'char_approx',
80
        'no_match',
81
    )
82
    _match_type_dict = {val: n for n, val in enumerate(_match_name)}
83
84
    def _synoname_strip_punct(self, word):
85
        """Return a word with punctuation stripped out.
86
87
        Parameters
88
        ----------
89
        word : str
90
            A word to strip punctuation from
91 1
92
        Returns
93 1
        -------
94
        str
95
            The word stripped of punctuation
96
97
        Examples
98
        --------
99
        >>> pe = Synoname()
100
        >>> pe._synoname_strip_punct('AB;CD EF-GH$IJ')
101
        'ABCD EFGHIJ'
102
103
104
        .. versionadded:: 0.3.0
105
        .. versionchanged:: 0.3.6
106
            Encapsulated in class
107
108
        """
109
        stripped = ''
110
        for char in word:
111
            if char not in set(',-./:;"&\'()!{|}?$%*+<=>[\\]^_`~'):
112
                stripped += char
113
        return stripped.strip()
114
115
    def _synoname_word_approximation(
116
        self, src_ln, tar_ln, src_fn='', tar_fn='', features=None
117
    ):
118 1
        """Return the Synoname word approximation score for two names.
119 1
120 1
        Parameters
121 1
        ----------
122 1
        src_ln : str
123
            Last name of the source
124 1
        tar_ln : str
125
            Last name of the target
126
        src_fn : str
127
            First name of the source (optional)
128
        tar_fn : str
129
            First name of the target (optional)
130
        features : dict
131
            A dict containing special features calculated using
132
            :py:class:`fingerprint.SynonameToolcode` (optional)
133
134
        Returns
135
        -------
136
        float
137
            The word approximation score
138
139
        Examples
140
        --------
141
        >>> pe = Synoname()
142
        >>> pe._synoname_word_approximation('Smith Waterman', 'Waterman',
143
        ... 'Tom Joe Bob', 'Tom Joe')
144
        0.6
145
146
147
        .. versionadded:: 0.3.0
148
        .. versionchanged:: 0.3.6
149
            Encapsulated in class
150
151
        """
152
        if features is None:
153
            features = {}
154
        if 'src_specials' not in features:
155
            features['src_specials'] = []
156
        if 'tar_specials' not in features:
157
            features['tar_specials'] = []
158
159
        src_len_specials = len(features['src_specials'])
160
        tar_len_specials = len(features['tar_specials'])
161 1
162 1
        # 1
163 1
        if ('gen_conflict' in features and features['gen_conflict']) or (
164 1
            'roman_conflict' in features and features['roman_conflict']
165 1
        ):
166 1
            return 0
167
168 1
        # 3 & 7
169 1
        full_tar1 = ' '.join((tar_ln, tar_fn)).replace('-', ' ').strip()
170
        for s_pos, s_type in features['tar_specials']:
171
            if s_type == 'a':
172 1
                full_tar1 = full_tar1[
173
                    : -(
174
                        1
175 1
                        + len(
176
                            self._stc._synoname_special_table[  # noqa: SF01
177
                                s_pos
178 1
                            ][1]
179 1
                        )
180 1
                    )
181 1
                ]
182
            elif s_type == 'b':
183
                loc = (
184
                    full_tar1.find(
185
                        ' '
186
                        + self._stc._synoname_special_table[  # noqa: SF01
187
                            s_pos
188
                        ][1]
189
                        + ' '
190
                    )
191 1
                    + 1
192 1
                )
193
                full_tar1 = (
194
                    full_tar1[:loc]
195
                    + full_tar1[
196
                        loc
197
                        + len(
198
                            self._stc._synoname_special_table[  # noqa: SF01
199
                                s_pos
200
                            ][1]
201
                        ) :
202 1
                    ]
203
                )
204
            elif s_type == 'c':
205
                full_tar1 = full_tar1[
206
                    1
207
                    + len(
208
                        self._stc._synoname_special_table[s_pos][  # noqa: SF01
209
                            1
210
                        ]
211
                    ) :
212
                ]
213 1
214 1
        full_src1 = ' '.join((src_ln, src_fn)).replace('-', ' ').strip()
215
        for s_pos, s_type in features['src_specials']:
216
            if s_type == 'a':
217
                full_src1 = full_src1[
218
                    : -(
219
                        1
220
                        + len(
221
                            self._stc._synoname_special_table[  # noqa: SF01
222
                                s_pos
223 1
                            ][1]
224 1
                        )
225 1
                    )
226 1
                ]
227
            elif s_type == 'b':
228
                loc = (
229
                    full_src1.find(
230
                        ' '
231
                        + self._stc._synoname_special_table[  # noqa: SF01
232
                            s_pos
233
                        ][1]
234
                        + ' '
235
                    )
236 1
                    + 1
237 1
                )
238
                full_src1 = (
239
                    full_src1[:loc]
240
                    + full_src1[
241
                        loc
242
                        + len(
243
                            self._stc._synoname_special_table[  # noqa: SF01
244
                                s_pos
245
                            ][1]
246
                        ) :
247 1
                    ]
248
                )
249
            elif s_type == 'c':
250
                full_src1 = full_src1[
251
                    1
252
                    + len(
253
                        self._stc._synoname_special_table[s_pos][  # noqa: SF01
254
                            1
255
                        ]
256
                    ) :
257
                ]
258 1
259 1
        full_tar2 = full_tar1
260
        for s_pos, s_type in features['tar_specials']:
261
            if s_type == 'd':
262
                full_tar2 = full_tar2[
263
                    len(
264
                        self._stc._synoname_special_table[s_pos][  # noqa: SF01
265
                            1
266
                        ]
267
                    ) :
268 1
                ]
269 1
            elif (
270 1
                s_type == 'X'
271 1
                and self._stc._synoname_special_table[s_pos][1]  # noqa: SF01
272
                in full_tar2
273
            ):
274
                loc = full_tar2.find(
275
                    ' '
276
                    + self._stc._synoname_special_table[s_pos][1]  # noqa: SF01
277
                )
278 1
                full_tar2 = (
279
                    full_tar2[:loc]
280
                    + full_tar2[
281
                        loc
282
                        + len(
283 1
                            self._stc._synoname_special_table[  # noqa: SF01
284
                                s_pos
285
                            ][1]
286
                        ) :
287 1
                    ]
288
                )
289
290
        full_src2 = full_src1
291
        for s_pos, s_type in features['src_specials']:
292
            if s_type == 'd':
293
                full_src2 = full_src2[
294
                    len(
295
                        self._stc._synoname_special_table[s_pos][  # noqa: SF01
296
                            1
297
                        ]
298
                    ) :
299 1
                ]
300 1
            elif (
301 1
                s_type == 'X'
302 1
                and self._stc._synoname_special_table[s_pos][1]  # noqa: SF01
303
                in full_src2
304
            ):
305
                loc = full_src2.find(
306
                    ' '
307
                    + self._stc._synoname_special_table[s_pos][1]  # noqa: SF01
308
                )
309 1
                full_src2 = (
310
                    full_src2[:loc]
311
                    + full_src2[
312
                        loc
313
                        + len(
314 1
                            self._stc._synoname_special_table[  # noqa: SF01
315
                                s_pos
316
                            ][1]
317
                        ) :
318 1
                    ]
319
                )
320
321
        full_tar1 = self._synoname_strip_punct(full_tar1)
322
        tar1_words = full_tar1.split()
323
        tar1_num_words = len(tar1_words)
324
325
        full_src1 = self._synoname_strip_punct(full_src1)
326
        src1_words = full_src1.split()
327
        src1_num_words = len(src1_words)
328
329
        full_tar2 = self._synoname_strip_punct(full_tar2)
330 1
        tar2_words = full_tar2.split()
331 1
        tar2_num_words = len(tar2_words)
332 1
333
        full_src2 = self._synoname_strip_punct(full_src2)
334 1
        src2_words = full_src2.split()
335 1
        src2_num_words = len(src2_words)
336 1
337
        # 2
338 1
        if (
339 1
            src1_num_words < 2
340 1
            and src_len_specials == 0
341
            and src2_num_words < 2
342 1
            and tar_len_specials == 0
343 1
        ):
344 1
            return 0
345
346
        # 4
347 1
        if (
348
            tar1_num_words == 1
349
            and src1_num_words == 1
350
            and tar1_words[0] == src1_words[0]
351
        ):
352
            return 1
353 1
        if tar1_num_words < 2 and tar_len_specials == 0:
354
            return 0
355
356 1
        # 5
357
        last_found = False
358
        for word in tar1_words:
359
            if src_ln.endswith(word) or word + ' ' in src_ln:
360
                last_found = True
361 1
362 1
        if not last_found:
363 1
            for word in src1_words:
364
                if tar_ln.endswith(word) or word + ' ' in tar_ln:
365
                    last_found = True
366 1
367 1
        # 6
368 1
        matches = 0
369 1
        if last_found:
370
            for i, s_word in enumerate(src1_words):
371 1
                for j, t_word in enumerate(tar1_words):
372 1
                    if s_word == t_word:
373 1
                        src1_words[i] = '@'
374 1
                        tar1_words[j] = '@'
375
                        matches += 1
376
        w_ratio = matches / max(tar1_num_words, src1_num_words)
377 1
        if matches > 1 or (
378 1
            matches == 1
379 1
            and src1_num_words == 1
380 1
            and tar1_num_words == 1
381 1
            and (tar_len_specials > 0 or src_len_specials > 0)
382 1
        ):
383 1
            return w_ratio
384 1
385 1
        # 8
386 1
        if (
387
            tar2_num_words == 1
388
            and src2_num_words == 1
389
            and tar2_words[0] == src2_words[0]
390
        ):
391
            return 1
392 1
        # I see no way that the following can be True if the equivalent in
393
        # #4 was False.
394
        if tar2_num_words < 2 and tar_len_specials == 0:  # pragma: no cover
395 1
            return 0
396
397
        # 9
398
        last_found = False
399
        for word in tar2_words:
400 1
            if src_ln.endswith(word) or word + ' ' in src_ln:
401
                last_found = True
402
403
        if not last_found:
404
            for word in src2_words:
405
                if tar_ln.endswith(word) or word + ' ' in tar_ln:
406
                    last_found = True
407 1
408 1
        if not last_found:
409 1
            return 0
410 1
411
        # 10
412 1
        matches = 0
413 1
        if last_found:
414 1
            for i, s_word in enumerate(src2_words):
415 1
                for j, t_word in enumerate(tar2_words):
416
                    if s_word == t_word:
417 1
                        src2_words[i] = '@'
418 1
                        tar2_words[j] = '@'
419
                        matches += 1
420
        w_ratio = matches / max(tar2_num_words, src2_num_words)
421 1
        if matches > 1 or (
422 1
            matches == 1
423 1
            and src2_num_words == 1
424 1
            and tar2_num_words == 1
425 1
            and (tar_len_specials > 0 or src_len_specials > 0)
426 1
        ):
427 1
            return w_ratio
428 1
429 1
        return 0
430 1
431
    def __init__(
432
        self,
433
        word_approx_min=0.3,
434
        char_approx_min=0.73,
435
        tests=2 ** 12 - 1,
436
        ret_name=False,
437
        **kwargs
438 1
    ):
439
        """Initialize Synoname instance.
440 1
441
        Parameters
442
        ----------
443
        word_approx_min : float
444
            The minimum word approximation value to signal a 'word_approx'
445
            match
446
        char_approx_min : float
447
            The minimum character approximation value to signal a 'char_approx'
448
            match
449
        tests : int or Iterable
450
            Either an integer indicating tests to perform or a list of test
451
            names to perform (defaults to performing all tests)
452
        ret_name : bool
453
            If True, returns the match name rather than its integer equivalent
454
        **kwargs
455
            Arbitrary keyword arguments
456
457
458
        .. versionadded:: 0.4.0
459
460
        """
461
        super(Synoname, self).__init__(**kwargs)
462
        self._word_approx_min = word_approx_min
463
        self._char_approx_min = char_approx_min
464
        self._ret_name = ret_name
465
466
        self._tests = tests
467
        if isinstance(self._tests, Iterable):
468
            new_tests = 0
469
            for term in self._tests:
470 1
                if term in self._test_dict:
471 1
                    new_tests += self._test_dict[term]
472 1
            self._tests = new_tests
473 1
474
    def dist_abs(self, src, tar, force_numeric=False):
475 1
        """Return the Synoname similarity type of two words.
476 1
477 1
        Parameters
478 1
        ----------
479 1
        src : str
480 1
            Source string for comparison
481 1
        tar : str
482
            Target string for comparison
483 1
        force_numeric : bool
484
            Overrides the instance's ret_name setting
485
486
        Returns
487
        -------
488
        int (or str if ret_name is True)
489
            Synoname value
490
491
        Examples
492
        --------
493
        >>> cmp = Synoname()
494
        >>> cmp.dist_abs(('Breghel', 'Pieter', ''), ('Brueghel', 'Pieter', ''))
495
        2
496
497
        >>> cmp = Synoname(ret_name=True)
498
        >>> cmp.dist_abs(('Breghel', 'Pieter', ''), ('Brueghel', 'Pieter', ''))
499
        'omission'
500
        >>> cmp.dist_abs(('Dore', 'Gustave', ''),
501
        ... ('Dore', 'Paul Gustave Louis Christophe', ''))
502
        'inclusion'
503
        >>> cmp.dist_abs(('Pereira', 'I. R.', ''), ('Pereira', 'I. Smith', ''))
504
        'word_approx'
505
506
507
        .. versionadded:: 0.3.0
508
        .. versionchanged:: 0.3.6
509
            Encapsulated in class
510
511
        """
512
        if isinstance(src, tuple):
513
            src_ln, src_fn, src_qual = src
514
        elif '#' in src:
515
            src_ln, src_fn, src_qual = src.split('#')[-3:]
516
        else:
517
            src_ln, src_fn, src_qual = src, '', ''
518
519
        if isinstance(tar, tuple):
520
            tar_ln, tar_fn, tar_qual = tar
521 1
        elif '#' in tar:
522 1
            tar_ln, tar_fn, tar_qual = tar.split('#')[-3:]
523 1
        else:
524 1
            tar_ln, tar_fn, tar_qual = tar, '', ''
525
526 1
        def _split_special(spec):
527
            spec_list = []
528 1
            while spec:
529 1
                spec_list.append((int(spec[:3]), spec[3:4]))
530 1
                spec = spec[4:]
531 1
            return spec_list
532
533 1
        def _fmt_retval(val):
534
            if self._ret_name and not force_numeric:
535 1
                return self._match_name[val]
536 1
            return val
537 1
538 1
        # 1. Preprocessing
539 1
540 1
        # Lowercasing
541
        src_fn = src_fn.strip().lower()
542 1
        src_ln = src_ln.strip().lower()
543 1
        src_qual = src_qual.strip().lower()
544 1
545 1
        tar_fn = tar_fn.strip().lower()
546
        tar_ln = tar_ln.strip().lower()
547
        tar_qual = tar_qual.strip().lower()
548
549
        # Create toolcodes
550 1
        src_ln, src_fn, src_tc = self._stc.fingerprint(
551 1
            src_ln, src_fn, src_qual
552 1
        )
553
        tar_ln, tar_fn, tar_tc = self._stc.fingerprint(
554 1
            tar_ln, tar_fn, tar_qual
555 1
        )
556 1
557
        src_generation = int(src_tc[2])
558
        src_romancode = int(src_tc[3:6])
559 1
        src_len_fn = int(src_tc[6:8])
560
        src_tc = src_tc.split('$')
561
        src_specials = _split_special(src_tc[1])
562 1
563
        tar_generation = int(tar_tc[2])
564
        tar_romancode = int(tar_tc[3:6])
565
        tar_len_fn = int(tar_tc[6:8])
566 1
        tar_tc = tar_tc.split('$')
567 1
        tar_specials = _split_special(tar_tc[1])
568 1
569 1
        gen_conflict = (src_generation != tar_generation) and bool(
570 1
            src_generation or tar_generation
571
        )
572 1
        roman_conflict = (src_romancode != tar_romancode) and bool(
573 1
            src_romancode or tar_romancode
574 1
        )
575 1
576 1
        ln_equal = src_ln == tar_ln
577
        fn_equal = src_fn == tar_fn
578 1
579
        # approx_c
580
        def _approx_c():
581 1
            if gen_conflict or roman_conflict:
582
                return False, 0
583
584
            full_src = ' '.join((src_ln, src_fn))
585 1
            if full_src.startswith('master '):
586 1
                full_src = full_src[len('master ') :]
587
                for intro in [
588
                    'of the ',
589 1
                    'of ',
590 1
                    'known as the ',
591 1
                    'with the ',
592
                    'with ',
593 1
                ]:
594 1
                    if full_src.startswith(intro):
595 1
                        full_src = full_src[len(intro) :]
596 1
597
            full_tar = ' '.join((tar_ln, tar_fn))
598
            if full_tar.startswith('master '):
599
                full_tar = full_tar[len('master ') :]
600
                for intro in [
601
                    'of the ',
602
                    'of ',
603 1
                    'known as the ',
604 1
                    'with the ',
605
                    'with ',
606 1
                ]:
607 1
                    if full_tar.startswith(intro):
608 1
                        full_tar = full_tar[len(intro) :]
609 1
610
            loc_ratio = self._ratcliff_obershelp.sim(full_src, full_tar)
611
            return loc_ratio >= self._char_approx_min, loc_ratio
612
613
        approx_c_result, ca_ratio = _approx_c()
614
615
        if self._tests & self._test_dict['exact'] and fn_equal and ln_equal:
616 1
            return _fmt_retval(self._match_type_dict['exact'])
617 1
        if self._tests & self._test_dict['omission']:
618
            self._lev._cost = (1, 1, 99, 99)  # noqa: SF01
619 1
            self._lev._mode = 'lev'  # noqa: SF01
620 1
            if fn_equal and self._lev.dist_abs(src_ln, tar_ln) == 1:
621
                if not roman_conflict:
622 1
                    return _fmt_retval(self._match_type_dict['omission'])
623
            elif ln_equal and self._lev.dist_abs(src_fn, tar_fn) == 1:
624 1
                return _fmt_retval(self._match_type_dict['omission'])
625 1
        if self._tests & self._test_dict['substitution']:
626 1
            self._lev._cost = (99, 99, 1, 99)  # noqa: SF01
627 1
            self._lev._mode = 'lev'  # noqa: SF01
628
            if fn_equal and self._lev.dist_abs(src_ln, tar_ln) == 1:
629
                return _fmt_retval(self._match_type_dict['substitution'])
630
            elif ln_equal and self._lev.dist_abs(src_fn, tar_fn) == 1:
631 1
                return _fmt_retval(self._match_type_dict['substitution'])
632 1
        if self._tests & self._test_dict['transposition']:
633 1
            self._lev._cost = (99, 99, 99, 1)  # noqa: SF01
634
            self._lev._mode = 'osa'  # noqa: SF01
635
            if fn_equal and (self._lev.dist_abs(src_ln, tar_ln) == 1):
636
                return _fmt_retval(self._match_type_dict['transposition'])
637 1
            elif ln_equal and (self._lev.dist_abs(src_fn, tar_fn) == 1):
638 1
                return _fmt_retval(self._match_type_dict['transposition'])
639 1
        if self._tests & self._test_dict['punctuation']:
640
            np_src_fn = self._synoname_strip_punct(src_fn)
641
            np_tar_fn = self._synoname_strip_punct(tar_fn)
642
            np_src_ln = self._synoname_strip_punct(src_ln)
643 1
            np_tar_ln = self._synoname_strip_punct(tar_ln)
644 1
645
            if (np_src_fn == np_tar_fn) and (np_src_ln == np_tar_ln):
646
                return _fmt_retval(self._match_type_dict['punctuation'])
647
648 1
            np_src_fn = self._synoname_strip_punct(src_fn.replace('-', ' '))
649 1
            np_tar_fn = self._synoname_strip_punct(tar_fn.replace('-', ' '))
650 1
            np_src_ln = self._synoname_strip_punct(src_ln.replace('-', ' '))
651
            np_tar_ln = self._synoname_strip_punct(tar_ln.replace('-', ' '))
652
653
            if (np_src_fn == np_tar_fn) and (np_src_ln == np_tar_ln):
654 1
                return _fmt_retval(self._match_type_dict['punctuation'])
655 1
656
        if self._tests & self._test_dict['initials'] and ln_equal:
657
            if src_fn and tar_fn:
658
                src_initials = self._synoname_strip_punct(src_fn).split()
659 1
                tar_initials = self._synoname_strip_punct(tar_fn).split()
660 1
                initials = bool(
661 1
                    (len(src_initials) == len(''.join(src_initials)))
662 1
                    or (len(tar_initials) == len(''.join(tar_initials)))
663 1
                )
664 1
                if initials:
665
                    src_initials = ''.join(_[0] for _ in src_initials)
666 1
                    tar_initials = ''.join(_[0] for _ in tar_initials)
667 1
                    if src_initials == tar_initials:
668
                        return _fmt_retval(self._match_type_dict['initials'])
669 1
                    initial_diff = abs(len(src_initials) - len(tar_initials))
670 1
                    self._lev._cost = (1, 99, 99, 99)  # noqa: SF01
671 1
                    self._lev._mode = 'lev'  # noqa: SF01
672 1
                    if initial_diff and (
673
                        (
674 1
                            initial_diff
675 1
                            == self._lev.dist_abs(src_initials, tar_initials,)
676
                        )
677 1
                        or (
678 1
                            initial_diff
679 1
                            == self._lev.dist_abs(tar_initials, src_initials,)
680 1
                        )
681 1
                    ):
682
                        return _fmt_retval(self._match_type_dict['initials'])
683
        if self._tests & self._test_dict['extension']:
684
            if src_ln[1:2] == tar_ln[1:2] and (
685 1
                src_ln.startswith(tar_ln) or tar_ln.startswith(src_ln)
686 1
            ):
687 1
                if (
688 1
                    (not src_len_fn and not tar_len_fn)
689 1
                    or (tar_fn and src_fn.startswith(tar_fn))
690 1
                    or (src_fn and tar_fn.startswith(src_fn))
691 1
                ) and not roman_conflict:
692
                    return _fmt_retval(self._match_type_dict['extension'])
693
        if self._tests & self._test_dict['inclusion'] and ln_equal:
694
            if (src_fn and src_fn in tar_fn) or (tar_fn and tar_fn in src_ln):
695
                return _fmt_retval(self._match_type_dict['inclusion'])
696
        if self._tests & self._test_dict['no_first'] and ln_equal:
697
            if src_fn == '' or tar_fn == '':
698
                return _fmt_retval(self._match_type_dict['no_first'])
699
        if self._tests & self._test_dict['word_approx']:
700
            ratio = self._synoname_word_approximation(
701
                src_ln,
702
                tar_ln,
703
                src_fn,
704
                tar_fn,
705
                {
706
                    'gen_conflict': gen_conflict,
707
                    'roman_conflict': roman_conflict,
708
                    'src_specials': src_specials,
709 1
                    'tar_specials': tar_specials,
710 1
                },
711 1
            )
712
            if ratio == 1 and self._tests & self._test_dict['confusions']:
713
                if (
714 1
                    ' '.join((src_fn, src_ln)).strip()
715
                    == ' '.join((tar_fn, tar_ln)).strip()
716
                ):
717
                    return _fmt_retval(self._match_type_dict['confusions'])
718
            if ratio >= self._word_approx_min:
719 1
                return _fmt_retval(self._match_type_dict['word_approx'])
720 1
        if self._tests & self._test_dict['char_approx']:
721 1
            if ca_ratio >= self._char_approx_min:
722 1
                return _fmt_retval(self._match_type_dict['char_approx'])
723 1
        return _fmt_retval(self._match_type_dict['no_match'])
724 1
725 1
    def dist(self, src, tar):
726 1
        """Return the normalized Synoname distance between two words.
727 1
728
        Parameters
729
        ----------
730
        src : str
731
            Source string for comparison
732
        tar : str
733
            Target string for comparison
734
735
        Returns
736
        -------
737
        float
738
            Normalized Synoname distance
739 1
740 1
741
        .. versionadded:: 0.3.0
742
        .. versionchanged:: 0.3.6
743
            Encapsulated in class
744 1
745 1
        """
746 1
        return self.dist_abs(src, tar, force_numeric=True) / 14
747 1
748 1
749 1
if __name__ == '__main__':
750 1
    import doctest
751
752
    doctest.testmod()
753