abydos.phonetic.caverphone - Code Metrics - Inspection of "Merge pull request #120 from chrislit/modularize" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Test Failed

Push — master ( 64abe2...a464fa )

by Chris

created 2018-10-19 22:32 UTC

abydos.phonetic.caverphone A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	183
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	112
dl	0
loc	183
rs	9.84
c	0
b	0
f	0
wmc	32

1 Function

Rating	Name	Duplication	Size	Complexity
F	caverphone()	0	149	32

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic.caverphone.

The phonetic.caverphone module implements the Caverphone phonetic algorithm.
"""

from __future__ import unicode_literals

__all__ = ['caverphone']


def caverphone(word, version=2):
    """Return the Caverphone code for a word.

    A description of version 1 of the algorithm can be found in
    :cite:`Hood:2002`.

    A description of version 2 of the algorithm can be found in
    :cite:`Hood:2004`.

    :param str word: the word to transform
    :param int version: the version of Caverphone to employ for encoding
        (defaults to 2)
    :returns: the Caverphone value
    :rtype: str

    >>> caverphone('Christopher')
    'KRSTFA1111'
    >>> caverphone('Niall')
    'NA11111111'
    >>> caverphone('Smith')
    'SMT1111111'
    >>> caverphone('Schmidt')
    'SKMT111111'

    >>> caverphone('Christopher', 1)
    'KRSTF1'
    >>> caverphone('Niall', 1)
    'N11111'
    >>> caverphone('Smith', 1)
    'SMT111'
    >>> caverphone('Schmidt', 1)
    'SKMT11'
    """
    _vowels = {'a', 'e', 'i', 'o', 'u'}

    word = word.lower()
    word = ''.join(c for c in word if c in
                   {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
                    'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
                    'y', 'z'})

    def _squeeze_replace(word, char, new_char):
        """Convert strings of char in word to one instance of new_char."""
        while char * 2 in word:
            word = word.replace(char * 2, char)
        return word.replace(char, new_char)

    # the main replacement algorithm
    if version != 1 and word[-1:] == 'e':
        word = word[:-1]
    if word:
        if word[:5] == 'cough':
            word = 'cou2f'+word[5:]
        if word[:5] == 'rough':
            word = 'rou2f'+word[5:]
        if word[:5] == 'tough':
            word = 'tou2f'+word[5:]
        if word[:6] == 'enough':
            word = 'enou2f'+word[6:]
        if version != 1 and word[:6] == 'trough':
            word = 'trou2f'+word[6:]
        if word[:2] == 'gn':
            word = '2n'+word[2:]
        if word[-2:] == 'mb':
            word = word[:-1]+'2'
        word = word.replace('cq', '2q')
        word = word.replace('ci', 'si')
        word = word.replace('ce', 'se')
        word = word.replace('cy', 'sy')
        word = word.replace('tch', '2ch')
        word = word.replace('c', 'k')
        word = word.replace('q', 'k')
        word = word.replace('x', 'k')
        word = word.replace('v', 'f')
        word = word.replace('dg', '2g')
        word = word.replace('tio', 'sio')
        word = word.replace('tia', 'sia')
        word = word.replace('d', 't')
        word = word.replace('ph', 'fh')
        word = word.replace('b', 'p')
        word = word.replace('sh', 's2')
        word = word.replace('z', 's')
        if word[0] in _vowels:
            word = 'A'+word[1:]
        word = word.replace('a', '3')
        word = word.replace('e', '3')
        word = word.replace('i', '3')
        word = word.replace('o', '3')
        word = word.replace('u', '3')
        if version != 1:
            word = word.replace('j', 'y')
            if word[:2] == 'y3':
                word = 'Y3'+word[2:]
            if word[:1] == 'y':
                word = 'A'+word[1:]
            word = word.replace('y', '3')
        word = word.replace('3gh3', '3kh3')
        word = word.replace('gh', '22')
        word = word.replace('g', 'k')

        word = _squeeze_replace(word, 's', 'S')
        word = _squeeze_replace(word, 't', 'T')
        word = _squeeze_replace(word, 'p', 'P')
        word = _squeeze_replace(word, 'k', 'K')
        word = _squeeze_replace(word, 'f', 'F')
        word = _squeeze_replace(word, 'm', 'M')
        word = _squeeze_replace(word, 'n', 'N')

        word = word.replace('w3', 'W3')
        if version == 1:
            word = word.replace('wy', 'Wy')
        word = word.replace('wh3', 'Wh3')
        if version == 1:
            word = word.replace('why', 'Why')
        if version != 1 and word[-1:] == 'w':
            word = word[:-1]+'3'
        word = word.replace('w', '2')
        if word[:1] == 'h':
            word = 'A'+word[1:]
        word = word.replace('h', '2')
        word = word.replace('r3', 'R3')
        if version == 1:
            word = word.replace('ry', 'Ry')
        if version != 1 and word[-1:] == 'r':
            word = word[:-1]+'3'
        word = word.replace('r', '2')
        word = word.replace('l3', 'L3')
        if version == 1:
            word = word.replace('ly', 'Ly')
        if version != 1 and word[-1:] == 'l':
            word = word[:-1]+'3'
        word = word.replace('l', '2')
        if version == 1:
            word = word.replace('j', 'y')
            word = word.replace('y3', 'Y3')
            word = word.replace('y', '2')
        word = word.replace('2', '')
        if version != 1 and word[-1:] == '3':
            word = word[:-1]+'A'
        word = word.replace('3', '')

    # pad with 1s, then extract the necessary length of code
    word += '1'*10
    if version != 1:
        word = word[:10]
    else:
        word = word[:6]

    return word


if __name__ == '__main__':
    import doctest
    doctest.testmod()


1			# -- coding: utf-8 --
2
3			# Copyright 2014-2018 by Christopher C. Little.
4			# This file is part of Abydos.
5			#
6			# Abydos is free software: you can redistribute it and/or modify
7			# it under the terms of the GNU General Public License as published by
8			# the Free Software Foundation, either version 3 of the License, or
9			# (at your option) any later version.
10			#
11			# Abydos is distributed in the hope that it will be useful,
12			# but WITHOUT ANY WARRANTY; without even the implied warranty of
13			# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14			# GNU General Public License for more details.
15			#
16			# You should have received a copy of the GNU General Public License
17			# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19			"""abydos.phonetic.caverphone.
20
21			The phonetic.caverphone module implements the Caverphone phonetic algorithm.
22			"""
23
24			from __future__ import unicode_literals
25
26			__all__ = ['caverphone']
27
28
29			def caverphone(word, version=2):
30			"""Return the Caverphone code for a word.
31
32			A description of version 1 of the algorithm can be found in
33			:cite:`Hood:2002`.
34
35			A description of version 2 of the algorithm can be found in
36			:cite:`Hood:2004`.
37
38			:param str word: the word to transform
39			:param int version: the version of Caverphone to employ for encoding
40			(defaults to 2)
41			:returns: the Caverphone value
42			:rtype: str
43
44			>>> caverphone('Christopher')
45			'KRSTFA1111'
46			>>> caverphone('Niall')
47			'NA11111111'
48			>>> caverphone('Smith')
49			'SMT1111111'
50			>>> caverphone('Schmidt')
51			'SKMT111111'
52
53			>>> caverphone('Christopher', 1)
54			'KRSTF1'
55			>>> caverphone('Niall', 1)
56			'N11111'
57			>>> caverphone('Smith', 1)
58			'SMT111'
59			>>> caverphone('Schmidt', 1)
60			'SKMT11'
61			"""
62			_vowels = {'a', 'e', 'i', 'o', 'u'}
63
64			word = word.lower()
65			word = ''.join(c for c in word if c in
66			{'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
67			'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
68			'y', 'z'})
69
70			def _squeeze_replace(word, char, new_char):
71			"""Convert strings of char in word to one instance of new_char."""
72			while char * 2 in word:
73			word = word.replace(char * 2, char)
74			return word.replace(char, new_char)
75
76			# the main replacement algorithm
77			if version != 1 and word[-1:] == 'e':
78			word = word[:-1]
79			if word:
80			if word[:5] == 'cough':
81			word = 'cou2f'+word[5:]
82			if word[:5] == 'rough':
83			word = 'rou2f'+word[5:]
84			if word[:5] == 'tough':
85			word = 'tou2f'+word[5:]
86			if word[:6] == 'enough':
87			word = 'enou2f'+word[6:]
88			if version != 1 and word[:6] == 'trough':
89			word = 'trou2f'+word[6:]
90			if word[:2] == 'gn':
91			word = '2n'+word[2:]
92			if word[-2:] == 'mb':
93			word = word[:-1]+'2'
94			word = word.replace('cq', '2q')
95			word = word.replace('ci', 'si')
96			word = word.replace('ce', 'se')
97			word = word.replace('cy', 'sy')
98			word = word.replace('tch', '2ch')
99			word = word.replace('c', 'k')
100			word = word.replace('q', 'k')
101			word = word.replace('x', 'k')
102			word = word.replace('v', 'f')
103			word = word.replace('dg', '2g')
104			word = word.replace('tio', 'sio')
105			word = word.replace('tia', 'sia')
106			word = word.replace('d', 't')
107			word = word.replace('ph', 'fh')
108			word = word.replace('b', 'p')
109			word = word.replace('sh', 's2')
110			word = word.replace('z', 's')
111			if word[0] in _vowels:
112			word = 'A'+word[1:]
113			word = word.replace('a', '3')
114			word = word.replace('e', '3')
115			word = word.replace('i', '3')
116			word = word.replace('o', '3')
117			word = word.replace('u', '3')
118			if version != 1:
119			word = word.replace('j', 'y')
120			if word[:2] == 'y3':
121			word = 'Y3'+word[2:]
122			if word[:1] == 'y':
123			word = 'A'+word[1:]
124			word = word.replace('y', '3')
125			word = word.replace('3gh3', '3kh3')
126			word = word.replace('gh', '22')
127			word = word.replace('g', 'k')
128
129			word = _squeeze_replace(word, 's', 'S')
130			word = _squeeze_replace(word, 't', 'T')
131			word = _squeeze_replace(word, 'p', 'P')
132			word = _squeeze_replace(word, 'k', 'K')
133			word = _squeeze_replace(word, 'f', 'F')
134			word = _squeeze_replace(word, 'm', 'M')
135			word = _squeeze_replace(word, 'n', 'N')
136
137			word = word.replace('w3', 'W3')
138			if version == 1:
139			word = word.replace('wy', 'Wy')
140			word = word.replace('wh3', 'Wh3')
141			if version == 1:
142			word = word.replace('why', 'Why')
143			if version != 1 and word[-1:] == 'w':
144			word = word[:-1]+'3'
145			word = word.replace('w', '2')
146			if word[:1] == 'h':
147			word = 'A'+word[1:]
148			word = word.replace('h', '2')
149			word = word.replace('r3', 'R3')
150			if version == 1:
151			word = word.replace('ry', 'Ry')
152			if version != 1 and word[-1:] == 'r':
153			word = word[:-1]+'3'
154			word = word.replace('r', '2')
155			word = word.replace('l3', 'L3')
156			if version == 1:
157			word = word.replace('ly', 'Ly')
158			if version != 1 and word[-1:] == 'l':
159			word = word[:-1]+'3'
160			word = word.replace('l', '2')
161			if version == 1:
162			word = word.replace('j', 'y')
163			word = word.replace('y3', 'Y3')
164			word = word.replace('y', '2')
165			word = word.replace('2', '')
166			if version != 1 and word[-1:] == '3':
167			word = word[:-1]+'A'
168			word = word.replace('3', '')
169
170			# pad with 1s, then extract the necessary length of code
171			word += '1'*10
172			if version != 1:
173			word = word[:10]
174			else:
175			word = word[:6]
176
177			return word
178
179
180			if __name__ == '__main__':
181			import doctest
182			doctest.testmod()
183

chrislit / abydos

Push — master ( 64abe2...a464fa )

abydos.phonetic.caverphone A

Complexity

Size/Duplication

Importance

1 Function

Duplication Side-by-Side

Filter issues like