| 
                    1
                 | 
                                    
                                                     | 
                
                 | 
                # -*- coding: utf-8 -*-  | 
            
            
                                                        
            
                                    
            
            
                | 
                    2
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    3
                 | 
                                    
                                                     | 
                
                 | 
                # Copyright 2014-2018 by Christopher C. Little.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    4
                 | 
                                    
                                                     | 
                
                 | 
                # This file is part of Abydos.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    5
                 | 
                                    
                                                     | 
                
                 | 
                #  | 
            
            
                                                        
            
                                    
            
            
                | 
                    6
                 | 
                                    
                                                     | 
                
                 | 
                # Abydos is free software: you can redistribute it and/or modify  | 
            
            
                                                        
            
                                    
            
            
                | 
                    7
                 | 
                                    
                                                     | 
                
                 | 
                # it under the terms of the GNU General Public License as published by  | 
            
            
                                                        
            
                                    
            
            
                | 
                    8
                 | 
                                    
                                                     | 
                
                 | 
                # the Free Software Foundation, either version 3 of the License, or  | 
            
            
                                                        
            
                                    
            
            
                | 
                    9
                 | 
                                    
                                                     | 
                
                 | 
                # (at your option) any later version.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    10
                 | 
                                    
                                                     | 
                
                 | 
                #  | 
            
            
                                                        
            
                                    
            
            
                | 
                    11
                 | 
                                    
                                                     | 
                
                 | 
                # Abydos is distributed in the hope that it will be useful,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    12
                 | 
                                    
                                                     | 
                
                 | 
                # but WITHOUT ANY WARRANTY; without even the implied warranty of  | 
            
            
                                                        
            
                                    
            
            
                | 
                    13
                 | 
                                    
                                                     | 
                
                 | 
                # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  | 
            
            
                                                        
            
                                    
            
            
                | 
                    14
                 | 
                                    
                                                     | 
                
                 | 
                # GNU General Public License for more details.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    15
                 | 
                                    
                                                     | 
                
                 | 
                #  | 
            
            
                                                        
            
                                    
            
            
                | 
                    16
                 | 
                                    
                                                     | 
                
                 | 
                # You should have received a copy of the GNU General Public License  | 
            
            
                                                        
            
                                    
            
            
                | 
                    17
                 | 
                                    
                                                     | 
                
                 | 
                # along with Abydos. If not, see <http://www.gnu.org/licenses/>.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    18
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    19
                 | 
                                    
                             1                          | 
                
                 | 
                """abydos.distance._strcmp95.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    20
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    21
                 | 
                                    
                                                     | 
                
                 | 
                The strcmp95 algorithm variant of Jaro-Winkler distance  | 
            
            
                                                        
            
                                    
            
            
                | 
                    22
                 | 
                                    
                                                     | 
                
                 | 
                """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    23
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    24
                 | 
                                    
                             1                          | 
                
                 | 
                from __future__ import (  | 
            
            
                                                        
            
                                    
            
            
                | 
                    25
                 | 
                                    
                                                     | 
                
                 | 
                    absolute_import,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    26
                 | 
                                    
                                                     | 
                
                 | 
                    division,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    27
                 | 
                                    
                                                     | 
                
                 | 
                    print_function,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    28
                 | 
                                    
                                                     | 
                
                 | 
                    unicode_literals,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    29
                 | 
                                    
                                                     | 
                
                 | 
                )  | 
            
            
                                                        
            
                                    
            
            
                | 
                    30
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    31
                 | 
                                    
                             1                          | 
                
                 | 
                from collections import defaultdict  | 
            
            
                                                        
            
                                    
            
            
                | 
                    32
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    33
                 | 
                                    
                             1                          | 
                
                 | 
                from six.moves import range  | 
            
            
                                                        
            
                                    
            
            
                | 
                    34
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    35
                 | 
                                    
                             1                          | 
                
                 | 
                from ._distance import _Distance  | 
            
            
                                                        
            
                                    
            
            
                | 
                    36
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    37
                 | 
                                    
                             1                          | 
                
                 | 
                __all__ = ['Strcmp95', 'dist_strcmp95', 'sim_strcmp95']  | 
            
            
                                                        
            
                                    
            
            
                | 
                    38
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    39
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    40
                 | 
                                    
                             1                          | 
                
                 | 
                class Strcmp95(_Distance):  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                        
            
                                    
            
            
                | 
                    41
                 | 
                                    
                                                     | 
                
                 | 
                    """Strcmp95.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    42
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    43
                 | 
                                    
                                                     | 
                
                 | 
                    This is a Python translation of the C code for strcmp95:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    44
                 | 
                                    
                                                     | 
                
                 | 
                    http://web.archive.org/web/20110629121242/http://www.census.gov/geo/msb/stand/strcmp.c  | 
            
            
                                                        
            
                                    
            
            
                | 
                    45
                 | 
                                    
                                                     | 
                
                 | 
                    :cite:`Winkler:1994`.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    46
                 | 
                                    
                                                     | 
                
                 | 
                    The above file is a US Government publication and, accordingly,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    47
                 | 
                                    
                                                     | 
                
                 | 
                    in the public domain.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    48
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    49
                 | 
                                    
                                                     | 
                
                 | 
                    This is based on the Jaro-Winkler distance, but also attempts to correct  | 
            
            
                                                        
            
                                    
            
            
                | 
                    50
                 | 
                                    
                                                     | 
                
                 | 
                    for some common typos and frequently confused characters. It is also  | 
            
            
                                                        
            
                                    
            
            
                | 
                    51
                 | 
                                    
                                                     | 
                
                 | 
                    limited to uppercase ASCII characters, so it is appropriate to American  | 
            
            
                                                        
            
                                    
            
            
                | 
                    52
                 | 
                                    
                                                     | 
                
                 | 
                    names, but not much else.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    53
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    54
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    55
                 | 
                                    
                             1                          | 
                
                 | 
                    _sp_mx = (  | 
            
            
                                                        
            
                                    
            
            
                | 
                    56
                 | 
                                    
                                                     | 
                
                 | 
                        ('A', 'E'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    57
                 | 
                                    
                                                     | 
                
                 | 
                        ('A', 'I'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    58
                 | 
                                    
                                                     | 
                
                 | 
                        ('A', 'O'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    59
                 | 
                                    
                                                     | 
                
                 | 
                        ('A', 'U'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    60
                 | 
                                    
                                                     | 
                
                 | 
                        ('B', 'V'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    61
                 | 
                                    
                                                     | 
                
                 | 
                        ('E', 'I'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    62
                 | 
                                    
                                                     | 
                
                 | 
                        ('E', 'O'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    63
                 | 
                                    
                                                     | 
                
                 | 
                        ('E', 'U'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    64
                 | 
                                    
                                                     | 
                
                 | 
                        ('I', 'O'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    65
                 | 
                                    
                                                     | 
                
                 | 
                        ('I', 'U'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    66
                 | 
                                    
                                                     | 
                
                 | 
                        ('O', 'U'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    67
                 | 
                                    
                                                     | 
                
                 | 
                        ('I', 'Y'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    68
                 | 
                                    
                                                     | 
                
                 | 
                        ('E', 'Y'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    69
                 | 
                                    
                                                     | 
                
                 | 
                        ('C', 'G'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    70
                 | 
                                    
                                                     | 
                
                 | 
                        ('E', 'F'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    71
                 | 
                                    
                                                     | 
                
                 | 
                        ('W', 'U'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    72
                 | 
                                    
                                                     | 
                
                 | 
                        ('W', 'V'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    73
                 | 
                                    
                                                     | 
                
                 | 
                        ('X', 'K'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    74
                 | 
                                    
                                                     | 
                
                 | 
                        ('S', 'Z'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    75
                 | 
                                    
                                                     | 
                
                 | 
                        ('X', 'S'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    76
                 | 
                                    
                                                     | 
                
                 | 
                        ('Q', 'C'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    77
                 | 
                                    
                                                     | 
                
                 | 
                        ('U', 'V'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    78
                 | 
                                    
                                                     | 
                
                 | 
                        ('M', 'N'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    79
                 | 
                                    
                                                     | 
                
                 | 
                        ('L', 'I'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    80
                 | 
                                    
                                                     | 
                
                 | 
                        ('Q', 'O'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    81
                 | 
                                    
                                                     | 
                
                 | 
                        ('P', 'R'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    82
                 | 
                                    
                                                     | 
                
                 | 
                        ('I', 'J'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    83
                 | 
                                    
                                                     | 
                
                 | 
                        ('2', 'Z'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    84
                 | 
                                    
                                                     | 
                
                 | 
                        ('5', 'S'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    85
                 | 
                                    
                                                     | 
                
                 | 
                        ('8', 'B'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    86
                 | 
                                    
                                                     | 
                
                 | 
                        ('1', 'I'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    87
                 | 
                                    
                                                     | 
                
                 | 
                        ('1', 'L'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    88
                 | 
                                    
                                                     | 
                
                 | 
                        ('0', 'O'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    89
                 | 
                                    
                                                     | 
                
                 | 
                        ('0', 'Q'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    90
                 | 
                                    
                                                     | 
                
                 | 
                        ('C', 'K'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    91
                 | 
                                    
                                                     | 
                
                 | 
                        ('G', 'J'), | 
            
            
                                                        
            
                                    
            
            
                | 
                    92
                 | 
                                    
                                                     | 
                
                 | 
                    )  | 
            
            
                                                        
            
                                    
            
            
                | 
                    93
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    94
                 | 
                                    
                             1                          | 
                
                 | 
                    def sim(self, src, tar, long_strings=False):  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                                                                        
                        
                         
                                                                                        
                                                                                            
                                                                                     
                     | 
                
            
                                                        
            
                                    
            
            
                | 
                    95
                 | 
                                    
                                                     | 
                
                 | 
                        """Return the strcmp95 similarity of two strings.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    96
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    97
                 | 
                                    
                                                     | 
                
                 | 
                        Parameters  | 
            
            
                                                        
            
                                    
            
            
                | 
                    98
                 | 
                                    
                                                     | 
                
                 | 
                        ----------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    99
                 | 
                                    
                                                     | 
                
                 | 
                        src : str  | 
            
            
                                                        
            
                                    
            
            
                | 
                    100
                 | 
                                    
                                                     | 
                
                 | 
                            Source string for comparison  | 
            
            
                                                        
            
                                    
            
            
                | 
                    101
                 | 
                                    
                                                     | 
                
                 | 
                        tar : str  | 
            
            
                                                        
            
                                    
            
            
                | 
                    102
                 | 
                                    
                                                     | 
                
                 | 
                            Target string for comparison  | 
            
            
                                                        
            
                                    
            
            
                | 
                    103
                 | 
                                    
                                                     | 
                
                 | 
                        long_strings : bool  | 
            
            
                                                        
            
                                    
            
            
                | 
                    104
                 | 
                                    
                                                     | 
                
                 | 
                            Set to True to increase the probability of a match when the number  | 
            
            
                                                        
            
                                    
            
            
                | 
                    105
                 | 
                                    
                                                     | 
                
                 | 
                            of matched characters is large. This option allows for a little  | 
            
            
                                                        
            
                                    
            
            
                | 
                    106
                 | 
                                    
                                                     | 
                
                 | 
                            more tolerance when the strings are large. It is not an appropriate  | 
            
            
                                                        
            
                                    
            
            
                | 
                    107
                 | 
                                    
                                                     | 
                
                 | 
                            test when comparing fixed length fields such as phone and social  | 
            
            
                                                        
            
                                    
            
            
                | 
                    108
                 | 
                                    
                                                     | 
                
                 | 
                            security numbers.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    109
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    110
                 | 
                                    
                                                     | 
                
                 | 
                        Returns  | 
            
            
                                                        
            
                                    
            
            
                | 
                    111
                 | 
                                    
                                                     | 
                
                 | 
                        -------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    112
                 | 
                                    
                                                     | 
                
                 | 
                        float  | 
            
            
                                                        
            
                                    
            
            
                | 
                    113
                 | 
                                    
                                                     | 
                
                 | 
                            Strcmp95 similarity  | 
            
            
                                                        
            
                                    
            
            
                | 
                    114
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    115
                 | 
                                    
                                                     | 
                
                 | 
                        Examples  | 
            
            
                                                        
            
                                    
            
            
                | 
                    116
                 | 
                                    
                                                     | 
                
                 | 
                        --------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    117
                 | 
                                    
                                                     | 
                
                 | 
                        >>> cmp = Strcmp95()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    118
                 | 
                                    
                                                     | 
                
                 | 
                        >>> cmp.sim('cat', 'hat') | 
            
            
                                                        
            
                                    
            
            
                | 
                    119
                 | 
                                    
                                                     | 
                
                 | 
                        0.7777777777777777  | 
            
            
                                                        
            
                                    
            
            
                | 
                    120
                 | 
                                    
                                                     | 
                
                 | 
                        >>> cmp.sim('Niall', 'Neil') | 
            
            
                                                        
            
                                    
            
            
                | 
                    121
                 | 
                                    
                                                     | 
                
                 | 
                        0.8454999999999999  | 
            
            
                                                        
            
                                    
            
            
                | 
                    122
                 | 
                                    
                                                     | 
                
                 | 
                        >>> cmp.sim('aluminum', 'Catalan') | 
            
            
                                                        
            
                                    
            
            
                | 
                    123
                 | 
                                    
                                                     | 
                
                 | 
                        0.6547619047619048  | 
            
            
                                                        
            
                                    
            
            
                | 
                    124
                 | 
                                    
                                                     | 
                
                 | 
                        >>> cmp.sim('ATCG', 'TAGC') | 
            
            
                                                        
            
                                    
            
            
                | 
                    125
                 | 
                                    
                                                     | 
                
                 | 
                        0.8333333333333334  | 
            
            
                                                        
            
                                    
            
            
                | 
                    126
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    127
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    128
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    129
                 | 
                                    
                             1                          | 
                
                 | 
                        def _in_range(char):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    130
                 | 
                                    
                                                     | 
                
                 | 
                            """Return True if char is in the range (0, 91).  | 
            
            
                                                        
            
                                    
            
            
                | 
                    131
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    132
                 | 
                                    
                                                     | 
                
                 | 
                            Parameters  | 
            
            
                                                        
            
                                    
            
            
                | 
                    133
                 | 
                                    
                                                     | 
                
                 | 
                            ----------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    134
                 | 
                                    
                                                     | 
                
                 | 
                            char : str  | 
            
            
                                                        
            
                                    
            
            
                | 
                    135
                 | 
                                    
                                                     | 
                
                 | 
                                The character to check  | 
            
            
                                                        
            
                                    
            
            
                | 
                    136
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    137
                 | 
                                    
                                                     | 
                
                 | 
                            Returns  | 
            
            
                                                        
            
                                    
            
            
                | 
                    138
                 | 
                                    
                                                     | 
                
                 | 
                            -------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    139
                 | 
                                    
                                                     | 
                
                 | 
                            bool  | 
            
            
                                                        
            
                                    
            
            
                | 
                    140
                 | 
                                    
                                                     | 
                
                 | 
                                True if char is in the range (0, 91)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    141
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    142
                 | 
                                    
                                                     | 
                
                 | 
                            """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    143
                 | 
                                    
                             1                          | 
                
                 | 
                            return 91 > ord(char) > 0  | 
            
            
                                                        
            
                                    
            
            
                | 
                    144
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    145
                 | 
                                    
                             1                          | 
                
                 | 
                        ying = src.strip().upper()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    146
                 | 
                                    
                             1                          | 
                
                 | 
                        yang = tar.strip().upper()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    147
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    148
                 | 
                                    
                             1                          | 
                
                 | 
                        if ying == yang:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    149
                 | 
                                    
                             1                          | 
                
                 | 
                            return 1.0  | 
            
            
                                                        
            
                                    
            
            
                | 
                    150
                 | 
                                    
                                                     | 
                
                 | 
                        # If either string is blank - return - added in Version 2  | 
            
            
                                                        
            
                                    
            
            
                | 
                    151
                 | 
                                    
                             1                          | 
                
                 | 
                        if not ying or not yang:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    152
                 | 
                                    
                             1                          | 
                
                 | 
                            return 0.0  | 
            
            
                                                        
            
                                    
            
            
                | 
                    153
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    154
                 | 
                                    
                             1                          | 
                
                 | 
                        adjwt = defaultdict(int)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    155
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    156
                 | 
                                    
                                                     | 
                
                 | 
                        # Initialize the adjwt array on the first call to the function only.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    157
                 | 
                                    
                                                     | 
                
                 | 
                        # The adjwt array is used to give partial credit for characters that  | 
            
            
                                                        
            
                                    
            
            
                | 
                    158
                 | 
                                    
                                                     | 
                
                 | 
                        # may be errors due to known phonetic or character recognition errors.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    159
                 | 
                                    
                                                     | 
                
                 | 
                        # A typical example is to match the letter "O" with the number "0"  | 
            
            
                                                        
            
                                    
            
            
                | 
                    160
                 | 
                                    
                             1                          | 
                
                 | 
                        for i in self._sp_mx:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    161
                 | 
                                    
                             1                          | 
                
                 | 
                            adjwt[(i[0], i[1])] = 3  | 
            
            
                                                        
            
                                    
            
            
                | 
                    162
                 | 
                                    
                             1                          | 
                
                 | 
                            adjwt[(i[1], i[0])] = 3  | 
            
            
                                                        
            
                                    
            
            
                | 
                    163
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    164
                 | 
                                    
                             1                          | 
                
                 | 
                        if len(ying) > len(yang):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    165
                 | 
                                    
                             1                          | 
                
                 | 
                            search_range = len(ying)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    166
                 | 
                                    
                             1                          | 
                
                 | 
                            minv = len(yang)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    167
                 | 
                                    
                                                     | 
                
                 | 
                        else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    168
                 | 
                                    
                             1                          | 
                
                 | 
                            search_range = len(yang)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    169
                 | 
                                    
                             1                          | 
                
                 | 
                            minv = len(ying)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    170
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    171
                 | 
                                    
                                                     | 
                
                 | 
                        # Blank out the flags  | 
            
            
                                                        
            
                                    
            
            
                | 
                    172
                 | 
                                    
                             1                          | 
                
                 | 
                        ying_flag = [0] * search_range  | 
            
            
                                                        
            
                                    
            
            
                | 
                    173
                 | 
                                    
                             1                          | 
                
                 | 
                        yang_flag = [0] * search_range  | 
            
            
                                                        
            
                                    
            
            
                | 
                    174
                 | 
                                    
                             1                          | 
                
                 | 
                        search_range = max(0, search_range // 2 - 1)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    175
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    176
                 | 
                                    
                                                     | 
                
                 | 
                        # Looking only within the search range,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    177
                 | 
                                    
                                                     | 
                
                 | 
                        # count and flag the matched pairs.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    178
                 | 
                                    
                             1                          | 
                
                 | 
                        num_com = 0  | 
            
            
                                                        
            
                                    
            
            
                | 
                    179
                 | 
                                    
                             1                          | 
                
                 | 
                        yl1 = len(yang) - 1  | 
            
            
                                                        
            
                                    
            
            
                | 
                    180
                 | 
                                    
                             1                          | 
                
                 | 
                        for i in range(len(ying)):  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                        
            
                                    
            
            
                | 
                    181
                 | 
                                    
                             1                          | 
                
                 | 
                            low_lim = (i - search_range) if (i >= search_range) else 0  | 
            
            
                                                        
            
                                    
            
            
                | 
                    182
                 | 
                                    
                             1                          | 
                
                 | 
                            hi_lim = (i + search_range) if ((i + search_range) <= yl1) else yl1  | 
            
            
                                                        
            
                                    
            
            
                | 
                    183
                 | 
                                    
                             1                          | 
                
                 | 
                            for j in range(low_lim, hi_lim + 1):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    184
                 | 
                                    
                             1                          | 
                
                 | 
                                if (yang_flag[j] == 0) and (yang[j] == ying[i]):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    185
                 | 
                                    
                             1                          | 
                
                 | 
                                    yang_flag[j] = 1  | 
            
            
                                                        
            
                                    
            
            
                | 
                    186
                 | 
                                    
                             1                          | 
                
                 | 
                                    ying_flag[i] = 1  | 
            
            
                                                        
            
                                    
            
            
                | 
                    187
                 | 
                                    
                             1                          | 
                
                 | 
                                    num_com += 1  | 
            
            
                                                        
            
                                    
            
            
                | 
                    188
                 | 
                                    
                             1                          | 
                
                 | 
                                    break  | 
            
            
                                                        
            
                                    
            
            
                | 
                    189
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    190
                 | 
                                    
                                                     | 
                
                 | 
                        # If no characters in common - return  | 
            
            
                                                        
            
                                    
            
            
                | 
                    191
                 | 
                                    
                             1                          | 
                
                 | 
                        if num_com == 0:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    192
                 | 
                                    
                             1                          | 
                
                 | 
                            return 0.0  | 
            
            
                                                        
            
                                    
            
            
                | 
                    193
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    194
                 | 
                                    
                                                     | 
                
                 | 
                        # Count the number of transpositions  | 
            
            
                                                        
            
                                    
            
            
                | 
                    195
                 | 
                                    
                             1                          | 
                
                 | 
                        k = n_trans = 0  | 
            
            
                                                        
            
                                    
            
            
                | 
                    196
                 | 
                                    
                             1                          | 
                
                 | 
                        for i in range(len(ying)):  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                        
            
                                    
            
            
                | 
                    197
                 | 
                                    
                             1                          | 
                
                 | 
                            if ying_flag[i] != 0:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    198
                 | 
                                    
                             1                          | 
                
                 | 
                                j = 0  | 
            
            
                                                        
            
                                    
            
            
                | 
                    199
                 | 
                                    
                             1                          | 
                
                 | 
                                for j in range(k, len(yang)):  # pragma: no branch  | 
            
            
                                                        
            
                                    
            
            
                | 
                    200
                 | 
                                    
                             1                          | 
                
                 | 
                                    if yang_flag[j] != 0:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    201
                 | 
                                    
                             1                          | 
                
                 | 
                                        k = j + 1  | 
            
            
                                                        
            
                                    
            
            
                | 
                    202
                 | 
                                    
                             1                          | 
                
                 | 
                                        break  | 
            
            
                                                        
            
                                    
            
            
                | 
                    203
                 | 
                                    
                             1                          | 
                
                 | 
                                if ying[i] != yang[j]:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    204
                 | 
                                    
                             1                          | 
                
                 | 
                                    n_trans += 1  | 
            
            
                                                        
            
                                    
            
            
                | 
                    205
                 | 
                                    
                             1                          | 
                
                 | 
                        n_trans //= 2  | 
            
            
                                                        
            
                                    
            
            
                | 
                    206
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    207
                 | 
                                    
                                                     | 
                
                 | 
                        # Adjust for similarities in unmatched characters  | 
            
            
                                                        
            
                                    
            
            
                | 
                    208
                 | 
                                    
                             1                          | 
                
                 | 
                        n_simi = 0  | 
            
            
                                                        
            
                                    
            
            
                | 
                    209
                 | 
                                    
                             1                          | 
                
                 | 
                        if minv > num_com:  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                        
            
                                    
            
            
                | 
                    210
                 | 
                                    
                             1                          | 
                
                 | 
                            for i in range(len(ying)):  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                        
            
                                    
            
            
                | 
                    211
                 | 
                                    
                             1                          | 
                
                 | 
                                if ying_flag[i] == 0 and _in_range(ying[i]):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    212
                 | 
                                    
                             1                          | 
                
                 | 
                                    for j in range(len(yang)):  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                        
            
                                    
            
            
                | 
                    213
                 | 
                                    
                             1                          | 
                
                 | 
                                        if yang_flag[j] == 0 and _in_range(yang[j]):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    214
                 | 
                                    
                             1                          | 
                
                 | 
                                            if (ying[i], yang[j]) in adjwt:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    215
                 | 
                                    
                             1                          | 
                
                 | 
                                                n_simi += adjwt[(ying[i], yang[j])]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    216
                 | 
                                    
                             1                          | 
                
                 | 
                                                yang_flag[j] = 2  | 
            
            
                                                        
            
                                    
            
            
                | 
                    217
                 | 
                                    
                             1                          | 
                
                 | 
                                                break  | 
            
            
                                                        
            
                                    
            
            
                | 
                    218
                 | 
                                    
                             1                          | 
                
                 | 
                        num_sim = n_simi / 10.0 + num_com  | 
            
            
                                                        
            
                                    
            
            
                | 
                    219
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    220
                 | 
                                    
                                                     | 
                
                 | 
                        # Main weight computation  | 
            
            
                                                        
            
                                    
            
            
                | 
                    221
                 | 
                                    
                             1                          | 
                
                 | 
                        weight = (  | 
            
            
                                                        
            
                                    
            
            
                | 
                    222
                 | 
                                    
                                                     | 
                
                 | 
                            num_sim / len(ying)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    223
                 | 
                                    
                                                     | 
                
                 | 
                            + num_sim / len(yang)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    224
                 | 
                                    
                                                     | 
                
                 | 
                            + (num_com - n_trans) / num_com  | 
            
            
                                                        
            
                                    
            
            
                | 
                    225
                 | 
                                    
                                                     | 
                
                 | 
                        )  | 
            
            
                                                        
            
                                    
            
            
                | 
                    226
                 | 
                                    
                             1                          | 
                
                 | 
                        weight /= 3.0  | 
            
            
                                                        
            
                                    
            
            
                | 
                    227
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    228
                 | 
                                    
                                                     | 
                
                 | 
                        # Continue to boost the weight if the strings are similar  | 
            
            
                                                        
            
                                    
            
            
                | 
                    229
                 | 
                                    
                             1                          | 
                
                 | 
                        if weight > 0.7:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    230
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    231
                 | 
                                    
                                                     | 
                
                 | 
                            # Adjust for having up to the first 4 characters in common  | 
            
            
                                                        
            
                                    
            
            
                | 
                    232
                 | 
                                    
                             1                          | 
                
                 | 
                            j = 4 if (minv >= 4) else minv  | 
            
            
                                                        
            
                                    
            
            
                | 
                    233
                 | 
                                    
                             1                          | 
                
                 | 
                            i = 0  | 
            
            
                                                        
            
                                    
            
            
                | 
                    234
                 | 
                                    
                             1                          | 
                
                 | 
                            while (i < j) and (ying[i] == yang[i]) and (not ying[i].isdigit()):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    235
                 | 
                                    
                             1                          | 
                
                 | 
                                i += 1  | 
            
            
                                                        
            
                                    
            
            
                | 
                    236
                 | 
                                    
                             1                          | 
                
                 | 
                            if i:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    237
                 | 
                                    
                             1                          | 
                
                 | 
                                weight += i * 0.1 * (1.0 - weight)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    238
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    239
                 | 
                                    
                                                     | 
                
                 | 
                            # Optionally adjust for long strings.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    240
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    241
                 | 
                                    
                                                     | 
                
                 | 
                            # After agreeing beginning chars, at least two more must agree and  | 
            
            
                                                        
            
                                    
            
            
                | 
                    242
                 | 
                                    
                                                     | 
                
                 | 
                            # the agreeing characters must be > .5 of remaining characters.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    243
                 | 
                                    
                             1                          | 
                
                 | 
                            if (  | 
            
            
                                                        
            
                                    
            
            
                | 
                    244
                 | 
                                    
                                                     | 
                
                 | 
                                long_strings  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                        
            
                                    
            
            
                | 
                    245
                 | 
                                    
                                                     | 
                
                 | 
                                and (minv > 4)  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                        
            
                                    
            
            
                | 
                    246
                 | 
                                    
                                                     | 
                
                 | 
                                and (num_com > i + 1)  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                        
            
                                    
            
            
                | 
                    247
                 | 
                                    
                                                     | 
                
                 | 
                                and (2 * num_com >= minv + i)  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                        
            
                                    
            
            
                | 
                    248
                 | 
                                    
                                                     | 
                
                 | 
                            ):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    249
                 | 
                                    
                             1                          | 
                
                 | 
                                if not ying[0].isdigit():  | 
            
            
                                                        
            
                                    
            
            
                | 
                    250
                 | 
                                    
                             1                          | 
                
                 | 
                                    weight += (1.0 - weight) * (  | 
            
            
                                                        
            
                                    
            
            
                | 
                    251
                 | 
                                    
                                                     | 
                
                 | 
                                        (num_com - i - 1) / (len(ying) + len(yang) - i * 2 + 2)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    252
                 | 
                                    
                                                     | 
                
                 | 
                                    )  | 
            
            
                                                        
            
                                    
            
            
                | 
                    253
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    254
                 | 
                                    
                             1                          | 
                
                 | 
                        return weight  | 
            
            
                                                        
            
                                    
            
            
                | 
                    255
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    256
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    257
                 | 
                                    
                             1                          | 
                
                 | 
                def sim_strcmp95(src, tar, long_strings=False):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    258
                 | 
                                    
                                                     | 
                
                 | 
                    """Return the strcmp95 similarity of two strings.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    259
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    260
                 | 
                                    
                                                     | 
                
                 | 
                    This is a wrapper for :py:meth:`Strcmp95.sim`.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    261
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    262
                 | 
                                    
                                                     | 
                
                 | 
                    Parameters  | 
            
            
                                                        
            
                                    
            
            
                | 
                    263
                 | 
                                    
                                                     | 
                
                 | 
                    ----------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    264
                 | 
                                    
                                                     | 
                
                 | 
                    src : str  | 
            
            
                                                        
            
                                    
            
            
                | 
                    265
                 | 
                                    
                                                     | 
                
                 | 
                        Source string for comparison  | 
            
            
                                                        
            
                                    
            
            
                | 
                    266
                 | 
                                    
                                                     | 
                
                 | 
                    tar : str  | 
            
            
                                                        
            
                                    
            
            
                | 
                    267
                 | 
                                    
                                                     | 
                
                 | 
                        Target string for comparison  | 
            
            
                                                        
            
                                    
            
            
                | 
                    268
                 | 
                                    
                                                     | 
                
                 | 
                    long_strings : bool  | 
            
            
                                                        
            
                                    
            
            
                | 
                    269
                 | 
                                    
                                                     | 
                
                 | 
                        Set to True to increase the probability of a match when the number of  | 
            
            
                                                        
            
                                    
            
            
                | 
                    270
                 | 
                                    
                                                     | 
                
                 | 
                        matched characters is large. This option allows for a little more  | 
            
            
                                                        
            
                                    
            
            
                | 
                    271
                 | 
                                    
                                                     | 
                
                 | 
                        tolerance when the strings are large. It is not an appropriate test  | 
            
            
                                                        
            
                                    
            
            
                | 
                    272
                 | 
                                    
                                                     | 
                
                 | 
                        when comparing fixed length fields such as phone and social security  | 
            
            
                                                        
            
                                    
            
            
                | 
                    273
                 | 
                                    
                                                     | 
                
                 | 
                        numbers.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    274
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    275
                 | 
                                    
                                                     | 
                
                 | 
                    Returns  | 
            
            
                                                        
            
                                    
            
            
                | 
                    276
                 | 
                                    
                                                     | 
                
                 | 
                    -------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    277
                 | 
                                    
                                                     | 
                
                 | 
                    float  | 
            
            
                                                        
            
                                    
            
            
                | 
                    278
                 | 
                                    
                                                     | 
                
                 | 
                        Strcmp95 similarity  | 
            
            
                                                        
            
                                    
            
            
                | 
                    279
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    280
                 | 
                                    
                                                     | 
                
                 | 
                    Examples  | 
            
            
                                                        
            
                                    
            
            
                | 
                    281
                 | 
                                    
                                                     | 
                
                 | 
                    --------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    282
                 | 
                                    
                                                     | 
                
                 | 
                    >>> sim_strcmp95('cat', 'hat') | 
            
            
                                                        
            
                                    
            
            
                | 
                    283
                 | 
                                    
                                                     | 
                
                 | 
                    0.7777777777777777  | 
            
            
                                                        
            
                                    
            
            
                | 
                    284
                 | 
                                    
                                                     | 
                
                 | 
                    >>> sim_strcmp95('Niall', 'Neil') | 
            
            
                                                        
            
                                    
            
            
                | 
                    285
                 | 
                                    
                                                     | 
                
                 | 
                    0.8454999999999999  | 
            
            
                                                        
            
                                    
            
            
                | 
                    286
                 | 
                                    
                                                     | 
                
                 | 
                    >>> sim_strcmp95('aluminum', 'Catalan') | 
            
            
                                                        
            
                                    
            
            
                | 
                    287
                 | 
                                    
                                                     | 
                
                 | 
                    0.6547619047619048  | 
            
            
                                                        
            
                                    
            
            
                | 
                    288
                 | 
                                    
                                                     | 
                
                 | 
                    >>> sim_strcmp95('ATCG', 'TAGC') | 
            
            
                                                        
            
                                    
            
            
                | 
                    289
                 | 
                                    
                                                     | 
                
                 | 
                    0.8333333333333334  | 
            
            
                                                        
            
                                    
            
            
                | 
                    290
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    291
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    292
                 | 
                                    
                             1                          | 
                
                 | 
                    return Strcmp95().sim(src, tar, long_strings)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    293
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    294
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    295
                 | 
                                    
                             1                          | 
                
                 | 
                def dist_strcmp95(src, tar, long_strings=False):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    296
                 | 
                                    
                                                     | 
                
                 | 
                    """Return the strcmp95 distance between two strings.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    297
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    298
                 | 
                                    
                                                     | 
                
                 | 
                    This is a wrapper for :py:meth:`Strcmp95.dist`.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    299
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    300
                 | 
                                    
                                                     | 
                
                 | 
                    Parameters  | 
            
            
                                                        
            
                                    
            
            
                | 
                    301
                 | 
                                    
                                                     | 
                
                 | 
                    ----------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    302
                 | 
                                    
                                                     | 
                
                 | 
                    src : str  | 
            
            
                                                        
            
                                    
            
            
                | 
                    303
                 | 
                                    
                                                     | 
                
                 | 
                        Source string for comparison  | 
            
            
                                                        
            
                                    
            
            
                | 
                    304
                 | 
                                    
                                                     | 
                
                 | 
                    tar : str  | 
            
            
                                                        
            
                                    
            
            
                | 
                    305
                 | 
                                    
                                                     | 
                
                 | 
                        Target string for comparison  | 
            
            
                                                        
            
                                    
            
            
                | 
                    306
                 | 
                                    
                                                     | 
                
                 | 
                    long_strings : bool  | 
            
            
                                                        
            
                                    
            
            
                | 
                    307
                 | 
                                    
                                                     | 
                
                 | 
                        Set to True to increase the probability of a match when the number of  | 
            
            
                                                        
            
                                    
            
            
                | 
                    308
                 | 
                                    
                                                     | 
                
                 | 
                        matched characters is large. This option allows for a little more  | 
            
            
                                                        
            
                                    
            
            
                | 
                    309
                 | 
                                    
                                                     | 
                
                 | 
                        tolerance when the strings are large. It is not an appropriate test  | 
            
            
                                                        
            
                                    
            
            
                | 
                    310
                 | 
                                    
                                                     | 
                
                 | 
                        when comparing fixed length fields such as phone and social security  | 
            
            
                                                        
            
                                    
            
            
                | 
                    311
                 | 
                                    
                                                     | 
                
                 | 
                        numbers.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    312
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    313
                 | 
                                    
                                                     | 
                
                 | 
                    Returns  | 
            
            
                                                        
            
                                    
            
            
                | 
                    314
                 | 
                                    
                                                     | 
                
                 | 
                    -------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    315
                 | 
                                    
                                                     | 
                
                 | 
                    float  | 
            
            
                                                        
            
                                    
            
            
                | 
                    316
                 | 
                                    
                                                     | 
                
                 | 
                        Strcmp95 distance  | 
            
            
                                                        
            
                                    
            
            
                | 
                    317
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    318
                 | 
                                    
                                                     | 
                
                 | 
                    Examples  | 
            
            
                                                        
            
                                    
            
            
                | 
                    319
                 | 
                                    
                                                     | 
                
                 | 
                    --------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    320
                 | 
                                    
                                                     | 
                
                 | 
                    >>> round(dist_strcmp95('cat', 'hat'), 12) | 
            
            
                                                        
            
                                    
            
            
                | 
                    321
                 | 
                                    
                                                     | 
                
                 | 
                    0.222222222222  | 
            
            
                                                        
            
                                    
            
            
                | 
                    322
                 | 
                                    
                                                     | 
                
                 | 
                    >>> round(dist_strcmp95('Niall', 'Neil'), 12) | 
            
            
                                                        
            
                                    
            
            
                | 
                    323
                 | 
                                    
                                                     | 
                
                 | 
                    0.1545  | 
            
            
                                                        
            
                                    
            
            
                | 
                    324
                 | 
                                    
                                                     | 
                
                 | 
                    >>> round(dist_strcmp95('aluminum', 'Catalan'), 12) | 
            
            
                                                        
            
                                    
            
            
                | 
                    325
                 | 
                                    
                                                     | 
                
                 | 
                    0.345238095238  | 
            
            
                                                        
            
                                    
            
            
                | 
                    326
                 | 
                                    
                                                     | 
                
                 | 
                    >>> round(dist_strcmp95('ATCG', 'TAGC'), 12) | 
            
            
                                                        
            
                                    
            
            
                | 
                    327
                 | 
                                    
                                                     | 
                
                 | 
                    0.166666666667  | 
            
            
                                                        
            
                                    
            
            
                | 
                    328
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    329
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    330
                 | 
                                    
                             1                          | 
                
                 | 
                    return Strcmp95().dist(src, tar, long_strings)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    331
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    332
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    333
                 | 
                                    
                                                     | 
                
                 | 
                if __name__ == '__main__':  | 
            
            
                                                        
            
                                    
            
            
                | 
                    334
                 | 
                                    
                                                     | 
                
                 | 
                    import doctest  | 
            
            
                                                        
            
                                    
            
            
                | 
                    335
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    336
                 | 
                                    
                                                     | 
                
                 | 
                    doctest.testmod()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    337
                 | 
                                    
                                                     | 
                
                 | 
                 |