Passed
Push — master ( d2a11f...643512 )
by Chris
01:59 queued 12s
created

PhoneticDistance.dist()   A

Complexity

Conditions 3

Size

Total Lines 50
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 3

Importance

Changes 0
Metric Value
eloc 7
dl 0
loc 50
ccs 7
cts 7
cp 1
rs 10
c 0
b 0
f 0
cc 3
nop 3
crap 3
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2019 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._phonetic_distance.
20
21
Phonetic distance.
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from ._distance import _Distance
32 1
from ..fingerprint._fingerprint import _Fingerprint
33 1
from ..phonetic._phonetic import _Phonetic
34 1
from ..stemmer._stemmer import _Stemmer
35
36 1
__all__ = ['PhoneticDistance']
37
38
39 1
class PhoneticDistance(_Distance):
40
    """Phonetic distance.
41
42
    Phonetic distance applies one or more supplied string transformations to
43
    words and compares the resulting transformed strings using a supplied
44
    distance measure.
45
46
    A simple example would be to create a 'Soundex distance':
47
48
    >>> from abydos.phonetic import Soundex
49
    >>> soundex = PhoneticDistance(transforms=Soundex())
50
    >>> soundex.dist('Ashcraft', 'Ashcroft')
51
    0.0
52
    >>> soundex.dist('Robert', 'Ashcraft')
53
    1.0
54
55
    .. versionadded:: 0.4.1
56
    """
57
58 1
    def __init__(
59
        self, transforms=None, metric=None, encode_alpha=False, **kwargs
60
    ):
61
        """Initialize PhoneticDistance instance.
62
63
        Parameters
64
        ----------
65
        transforms : list or _Phonetic or _Stemmer or _Fingerprint or type
66
            An instance of a subclass of _Phonetic, _Stemmer, or _Fingerprint,
67
            or a list (or other iterable) of such instances to apply to each
68
            input word before computing their distance or similarity. If
69
            omitted, no transformations will be performed.
70
        metric : _Distance or type
71
            An instance of a subclass of _Distance, used for computing the
72
            inputs' distance or similarity after being transformed. If omitted,
73
            the strings will be compared for identify (returning 0.0 if
74
            identical, otherwise 1.0, when distance is computed).
75
        encode_alpha : bool
76
            Set to true to use the encode_alpha method of phonetic algoritms
77
            whenever possible.
78
        **kwargs
79
            Arbitrary keyword arguments
80
81
82
        .. versionadded:: 0.4.1
83
84
        """
85 1
        super(PhoneticDistance, self).__init__(**kwargs)
86 1
        self.transforms = transforms
87 1
        if self.transforms:
88 1
            if isinstance(self.transforms, (list, tuple)):
89 1
                self.transforms = list(self.transforms)
90
            else:
91 1
                self.transforms = [self.transforms]
92
93 1
            for i, trans in enumerate(self.transforms):
94 1
                if isinstance(trans, (_Phonetic, _Fingerprint, _Stemmer)):
95 1
                    continue
96 1
                elif isinstance(trans, type) and issubclass(
97
                    trans, (_Phonetic, _Fingerprint, _Stemmer)
98
                ):
99 1
                    self.transforms[i] = trans()
100 1
                elif callable(trans):
101 1
                    continue
102
                else:
103 1
                    raise TypeError(
104
                        '{} has unknown type {}'.format(trans, type(trans))
105
                    )
106
107 1
            for i, trans in enumerate(self.transforms):
108 1
                if isinstance(trans, _Phonetic):
109 1
                    if encode_alpha:
110 1
                        self.transforms[i] = self.transforms[i].encode_alpha
111
                    else:
112 1
                        self.transforms[i] = self.transforms[i].encode
113 1
                elif isinstance(trans, _Fingerprint):
114 1
                    self.transforms[i] = self.transforms[i].fingerprint
115 1
                elif isinstance(trans, _Stemmer):
116 1
                    self.transforms[i] = self.transforms[i].stem
117
118
        else:
119 1
            self.transforms = []
120
121 1
        self.metric = metric
122 1
        if self.metric:
123 1
            if isinstance(self.metric, type) and issubclass(
124
                self.metric, _Distance
125
            ):
126 1
                self.metric = self.metric()
127 1
            elif not isinstance(self.metric, _Distance):
128 1
                raise TypeError(
129
                    '{} has unknown type {}'.format(
130
                        self.metric, type(self.metric)
131
                    )
132
                )
133
134 1
    def dist_abs(self, src, tar):
135
        """Return the Phonetic distance.
136
137
        Parameters
138
        ----------
139
        src : str
140
            Source string for comparison
141
        tar : str
142
            Target string for comparison
143
144
        Returns
145
        -------
146
        float or int
147
            The Phonetic distance
148
149
        Examples
150
        --------
151
        >>> from abydos.phonetic import Soundex
152
        >>> cmp = PhoneticDistance(Soundex())
153
        >>> cmp.dist_abs('cat', 'hat')
154
        1
155
        >>> cmp.dist_abs('Niall', 'Neil')
156
        0
157
        >>> cmp.dist_abs('Colin', 'Cuilen')
158
        0
159
        >>> cmp.dist_abs('ATCG', 'TAGC')
160
        1
161
162
        >>> from abydos.distance import Levenshtein
163
        >>> cmp = PhoneticDistance(transforms=[Soundex], metric=Levenshtein)
164
        >>> cmp.dist_abs('cat', 'hat')
165
        1
166
        >>> cmp.dist_abs('Niall', 'Neil')
167
        0
168
        >>> cmp.dist_abs('Colin', 'Cuilen')
169
        0
170
        >>> cmp.dist_abs('ATCG', 'TAGC')
171
        3
172
173
174
        .. versionadded:: 0.4.1
175
176
        """
177 1
        for trans in self.transforms:
178 1
            src = trans(src)
179 1
            tar = trans(tar)
180 1
        if self.metric:
181 1
            return self.metric.dist_abs(src, tar)
182
        else:
183 1
            return int(src != tar)
184
185 1
    def dist(self, src, tar):
186
        """Return the normalized Phonetic distance.
187
188
        Parameters
189
        ----------
190
        src : str
191
            Source string for comparison
192
        tar : str
193
            Target string for comparison
194
195
        Returns
196
        -------
197
        float
198
            The normalized Phonetic distance
199
200
        Examples
201
        --------
202
        >>> from abydos.phonetic import Soundex
203
        >>> cmp = PhoneticDistance(Soundex())
204
        >>> cmp.dist('cat', 'hat')
205
        1.0
206
        >>> cmp.dist('Niall', 'Neil')
207
        0.0
208
        >>> cmp.dist('Colin', 'Cuilen')
209
        0.0
210
        >>> cmp.dist('ATCG', 'TAGC')
211
        1.0
212
213
        >>> from abydos.distance import Levenshtein
214
        >>> cmp = PhoneticDistance(transforms=[Soundex], metric=Levenshtein)
215
        >>> cmp.dist('cat', 'hat')
216
        0.25
217
        >>> cmp.dist('Niall', 'Neil')
218
        0.0
219
        >>> cmp.dist('Colin', 'Cuilen')
220
        0.0
221
        >>> cmp.dist('ATCG', 'TAGC')
222
        0.75
223
224
225
        .. versionadded:: 0.4.1
226
227
        """
228 1
        for trans in self.transforms:
229 1
            src = trans(src)
230 1
            tar = trans(tar)
231 1
        if self.metric:
232 1
            return self.metric.dist(src, tar)
233
        else:
234 1
            return float(src != tar)
235
236
237
if __name__ == '__main__':
238
    import doctest
239
240
    doctest.testmod()
241