abydos.distance._phonetic_distance.PhoneticDistance.dist() - Code Metrics - Inspection of "0.4.1" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#225)

by Chris

created 2019-07-12 04:59 UTC

PhoneticDistance.dist() A

↳ Parent: abydos.distance._phonetic_distance

Complexity

Conditions

Size

Total Lines	50
Code Lines	7

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	7
CRAP Score	3

Importance

Changes

Metric	Value
eloc	7
dl	0
loc	50
ccs	7
cts	7
cp	1
rs	10
c	0
b	0
f	0
cc	3
nop	3
crap	3

# -*- coding: utf-8 -*-

# Copyright 2019 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.distance._phonetic_distance.

Phonetic distance.
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from ._distance import _Distance
from ..fingerprint._fingerprint import _Fingerprint
from ..phonetic._phonetic import _Phonetic
from ..stemmer._stemmer import _Stemmer

__all__ = ['PhoneticDistance']


class PhoneticDistance(_Distance):
    """Phonetic distance.

    Phonetic distance applies one or more supplied string transformations to
    words and compares the resulting transformed strings using a supplied
    distance measure.

    A simple example would be to create a 'Soundex distance':

    >>> from abydos.phonetic import Soundex
    >>> soundex = PhoneticDistance(transforms=Soundex())
    >>> soundex.dist('Ashcraft', 'Ashcroft')
    0.0
    >>> soundex.dist('Robert', 'Ashcraft')
    1.0

    .. versionadded:: 0.4.1
    """

    def __init__(
        self, transforms=None, metric=None, encode_alpha=False, **kwargs
    ):
        """Initialize PhoneticDistance instance.

        Parameters
        ----------
        transforms : list or _Phonetic or _Stemmer or _Fingerprint or type
            An instance of a subclass of _Phonetic, _Stemmer, or _Fingerprint,
            or a list (or other iterable) of such instances to apply to each
            input word before computing their distance or similarity. If
            omitted, no transformations will be performed.
        metric : _Distance or type
            An instance of a subclass of _Distance, used for computing the
            inputs' distance or similarity after being transformed. If omitted,
            the strings will be compared for identify (returning 0.0 if
            identical, otherwise 1.0, when distance is computed).
        encode_alpha : bool
            Set to true to use the encode_alpha method of phonetic algoritms
            whenever possible.
        **kwargs
            Arbitrary keyword arguments


        .. versionadded:: 0.4.1

        """
        super(PhoneticDistance, self).__init__(**kwargs)
        self.transforms = transforms
        if self.transforms:
            if isinstance(self.transforms, (list, tuple)):
                self.transforms = list(self.transforms)
            else:
                self.transforms = [self.transforms]

            for i, trans in enumerate(self.transforms):
                if isinstance(trans, (_Phonetic, _Fingerprint, _Stemmer)):
                    continue
                elif isinstance(trans, type) and issubclass(
                    trans, (_Phonetic, _Fingerprint, _Stemmer)
                ):
                    self.transforms[i] = trans()
                elif callable(trans):
                    continue
                else:
                    raise TypeError(
                        '{} has unknown type {}'.format(trans, type(trans))
                    )

            for i, trans in enumerate(self.transforms):
                if isinstance(trans, _Phonetic):
                    if encode_alpha:
                        self.transforms[i] = self.transforms[i].encode_alpha
                    else:
                        self.transforms[i] = self.transforms[i].encode
                elif isinstance(trans, _Fingerprint):
                    self.transforms[i] = self.transforms[i].fingerprint
                elif isinstance(trans, _Stemmer):
                    self.transforms[i] = self.transforms[i].stem

        else:
            self.transforms = []

        self.metric = metric
        if self.metric:
            if isinstance(self.metric, type) and issubclass(
                self.metric, _Distance
            ):
                self.metric = self.metric()
            elif not isinstance(self.metric, _Distance):
                raise TypeError(
                    '{} has unknown type {}'.format(
                        self.metric, type(self.metric)
                    )
                )

    def dist_abs(self, src, tar):
        """Return the Phonetic distance.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        float or int
            The Phonetic distance

        Examples
        --------
        >>> from abydos.phonetic import Soundex
        >>> cmp = PhoneticDistance(Soundex())
        >>> cmp.dist_abs('cat', 'hat')
        1
        >>> cmp.dist_abs('Niall', 'Neil')
        0
        >>> cmp.dist_abs('Colin', 'Cuilen')
        0
        >>> cmp.dist_abs('ATCG', 'TAGC')
        1

        >>> from abydos.distance import Levenshtein
        >>> cmp = PhoneticDistance(transforms=[Soundex], metric=Levenshtein)
        >>> cmp.dist_abs('cat', 'hat')
        1
        >>> cmp.dist_abs('Niall', 'Neil')
        0
        >>> cmp.dist_abs('Colin', 'Cuilen')
        0
        >>> cmp.dist_abs('ATCG', 'TAGC')
        3


        .. versionadded:: 0.4.1

        """
        for trans in self.transforms:
            src = trans(src)
            tar = trans(tar)
        if self.metric:
            return self.metric.dist_abs(src, tar)
        else:
            return int(src != tar)

    def dist(self, src, tar):
        """Return the normalized Phonetic distance.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        float
            The normalized Phonetic distance

        Examples
        --------
        >>> from abydos.phonetic import Soundex
        >>> cmp = PhoneticDistance(Soundex())
        >>> cmp.dist('cat', 'hat')
        1.0
        >>> cmp.dist('Niall', 'Neil')
        0.0
        >>> cmp.dist('Colin', 'Cuilen')
        0.0
        >>> cmp.dist('ATCG', 'TAGC')
        1.0

        >>> from abydos.distance import Levenshtein
        >>> cmp = PhoneticDistance(transforms=[Soundex], metric=Levenshtein)
        >>> cmp.dist('cat', 'hat')
        0.25
        >>> cmp.dist('Niall', 'Neil')
        0.0
        >>> cmp.dist('Colin', 'Cuilen')
        0.0
        >>> cmp.dist('ATCG', 'TAGC')
        0.75


        .. versionadded:: 0.4.1

        """
        for trans in self.transforms:
            src = trans(src)
            tar = trans(tar)
        if self.metric:
            return self.metric.dist(src, tar)
        else:
            return float(src != tar)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2019 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.distance._phonetic_distance.
20
21		Phonetic distance.
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from ._distance import _Distance
32	1	from ..fingerprint._fingerprint import _Fingerprint
33	1	from ..phonetic._phonetic import _Phonetic
34	1	from ..stemmer._stemmer import _Stemmer
35
36	1	__all__ = ['PhoneticDistance']
37
38
39	1	class PhoneticDistance(_Distance):
40		"""Phonetic distance.
41
42		Phonetic distance applies one or more supplied string transformations to
43		words and compares the resulting transformed strings using a supplied
44		distance measure.
45
46		A simple example would be to create a 'Soundex distance':
47
48		>>> from abydos.phonetic import Soundex
49		>>> soundex = PhoneticDistance(transforms=Soundex())
50		>>> soundex.dist('Ashcraft', 'Ashcroft')
51		0.0
52		>>> soundex.dist('Robert', 'Ashcraft')
53		1.0
54
55		.. versionadded:: 0.4.1
56		"""
57
58	1	def __init__(
59		self, transforms=None, metric=None, encode_alpha=False, **kwargs
60		):
61		"""Initialize PhoneticDistance instance.
62
63		Parameters
64		----------
65		transforms : list or _Phonetic or _Stemmer or _Fingerprint or type
66		An instance of a subclass of _Phonetic, _Stemmer, or _Fingerprint,
67		or a list (or other iterable) of such instances to apply to each
68		input word before computing their distance or similarity. If
69		omitted, no transformations will be performed.
70		metric : _Distance or type
71		An instance of a subclass of _Distance, used for computing the
72		inputs' distance or similarity after being transformed. If omitted,
73		the strings will be compared for identify (returning 0.0 if
74		identical, otherwise 1.0, when distance is computed).
75		encode_alpha : bool
76		Set to true to use the encode_alpha method of phonetic algoritms
77		whenever possible.
78		**kwargs
79		Arbitrary keyword arguments
80
81
82		.. versionadded:: 0.4.1
83
84		"""
85	1	super(PhoneticDistance, self).__init__(**kwargs)
86	1	self.transforms = transforms
87	1	if self.transforms:
88	1	if isinstance(self.transforms, (list, tuple)):
89	1	self.transforms = list(self.transforms)
90		else:
91	1	self.transforms = [self.transforms]
92
93	1	for i, trans in enumerate(self.transforms):
94	1	if isinstance(trans, (_Phonetic, _Fingerprint, _Stemmer)):
95	1	continue
96	1	elif isinstance(trans, type) and issubclass(
97		trans, (_Phonetic, _Fingerprint, _Stemmer)
98		):
99	1	self.transforms[i] = trans()
100	1	elif callable(trans):
101	1	continue
102		else:
103	1	raise TypeError(
104		'{} has unknown type {}'.format(trans, type(trans))
105		)
106
107	1	for i, trans in enumerate(self.transforms):
108	1	if isinstance(trans, _Phonetic):
109	1	if encode_alpha:
110	1	self.transforms[i] = self.transforms[i].encode_alpha
111		else:
112	1	self.transforms[i] = self.transforms[i].encode
113	1	elif isinstance(trans, _Fingerprint):
114	1	self.transforms[i] = self.transforms[i].fingerprint
115	1	elif isinstance(trans, _Stemmer):
116	1	self.transforms[i] = self.transforms[i].stem
117
118		else:
119	1	self.transforms = []
120
121	1	self.metric = metric
122	1	if self.metric:
123	1	if isinstance(self.metric, type) and issubclass(
124		self.metric, _Distance
125		):
126	1	self.metric = self.metric()
127	1	elif not isinstance(self.metric, _Distance):
128	1	raise TypeError(
129		'{} has unknown type {}'.format(
130		self.metric, type(self.metric)
131		)
132		)
133
134	1	def dist_abs(self, src, tar):
135		"""Return the Phonetic distance.
136
137		Parameters
138		----------
139		src : str
140		Source string for comparison
141		tar : str
142		Target string for comparison
143
144		Returns
145		-------
146		float or int
147		The Phonetic distance
148
149		Examples
150		--------
151		>>> from abydos.phonetic import Soundex
152		>>> cmp = PhoneticDistance(Soundex())
153		>>> cmp.dist_abs('cat', 'hat')
154		1
155		>>> cmp.dist_abs('Niall', 'Neil')
156		0
157		>>> cmp.dist_abs('Colin', 'Cuilen')
158		0
159		>>> cmp.dist_abs('ATCG', 'TAGC')
160		1
161
162		>>> from abydos.distance import Levenshtein
163		>>> cmp = PhoneticDistance(transforms=[Soundex], metric=Levenshtein)
164		>>> cmp.dist_abs('cat', 'hat')
165		1
166		>>> cmp.dist_abs('Niall', 'Neil')
167		0
168		>>> cmp.dist_abs('Colin', 'Cuilen')
169		0
170		>>> cmp.dist_abs('ATCG', 'TAGC')
171		3
172
173
174		.. versionadded:: 0.4.1
175
176		"""
177	1	for trans in self.transforms:
178	1	src = trans(src)
179	1	tar = trans(tar)
180	1	if self.metric:
181	1	return self.metric.dist_abs(src, tar)
182		else:
183	1	return int(src != tar)
184
185	1	def dist(self, src, tar):
186		"""Return the normalized Phonetic distance.
187
188		Parameters
189		----------
190		src : str
191		Source string for comparison
192		tar : str
193		Target string for comparison
194
195		Returns
196		-------
197		float
198		The normalized Phonetic distance
199
200		Examples
201		--------
202		>>> from abydos.phonetic import Soundex
203		>>> cmp = PhoneticDistance(Soundex())
204		>>> cmp.dist('cat', 'hat')
205		1.0
206		>>> cmp.dist('Niall', 'Neil')
207		0.0
208		>>> cmp.dist('Colin', 'Cuilen')
209		0.0
210		>>> cmp.dist('ATCG', 'TAGC')
211		1.0
212
213		>>> from abydos.distance import Levenshtein
214		>>> cmp = PhoneticDistance(transforms=[Soundex], metric=Levenshtein)
215		>>> cmp.dist('cat', 'hat')
216		0.25
217		>>> cmp.dist('Niall', 'Neil')
218		0.0
219		>>> cmp.dist('Colin', 'Cuilen')
220		0.0
221		>>> cmp.dist('ATCG', 'TAGC')
222		0.75
223
224
225		.. versionadded:: 0.4.1
226
227		"""
228	1	for trans in self.transforms:
229	1	src = trans(src)
230	1	tar = trans(tar)
231	1	if self.metric:
232	1	return self.metric.dist(src, tar)
233		else:
234	1	return float(src != tar)
235
236
237		if __name__ == '__main__':
238		import doctest
239
240		doctest.testmod()
241

chrislit / abydos

Pull Request — master (#225)

PhoneticDistance.dist() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like