abydos.phonetic.roger_root - Code Metrics - Inspection of "applied Black codestyle" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 6ed6e1...91db7a )

by Chris

created 2018-10-24 05:47 UTC

abydos.phonetic.roger_root A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	219
Duplicated Lines	0 %

Test Coverage

Coverage

100%

Importance

Changes

Metric	Value
eloc	147
dl	0
loc	219
ccs	31
cts	31
cp	1
rs	10
c	0
b	0
f	0
wmc	7

1 Function

Rating	Name	Duplication	Size	Complexity
C	roger_root()	0	177	7

# -*- coding: utf-8 -*-

# Copyright 2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic.roger_root.

The phonetic.roger_root module implements the Roger Root phonetic algorithm.
"""

from __future__ import unicode_literals

from unicodedata import normalize as unicode_normalize

from six import text_type
from six.moves import range

from . import _delete_consecutive_repeats

__all__ = ['roger_root']


def roger_root(word, max_length=5, zero_pad=True):
    """Return the Roger Root code for a word.

    This is Roger Root name coding, described in :cite:`Moore:1977`.

    :param str word: the word to transform
    :param int max_length: the maximum length (default 5) of the code to return
    :param bool zero_pad: pad the end of the return value with 0s to achieve a
        max_length string
    :returns: the Roger Root code
    :rtype: str

    >>> roger_root('Christopher')
    '06401'
    >>> roger_root('Niall')
    '02500'
    >>> roger_root('Smith')
    '00310'
    >>> roger_root('Schmidt')
    '06310'
    """
    # uppercase, normalize, decompose, and filter non-A-Z out
    word = unicode_normalize('NFKD', text_type(word.upper()))
    word = word.replace('ß', 'SS')
    word = ''.join(
        c
        for c in word
        if c
        in {
            'A',
            'B',
            'C',
            'D',
            'E',
            'F',
            'G',
            'H',
            'I',
            'J',
            'K',
            'L',
            'M',
            'N',
            'O',
            'P',
            'Q',
            'R',
            'S',
            'T',
            'U',
            'V',
            'W',
            'X',
            'Y',
            'Z',
        }
    )

    # '*' is used to prevent combining by _delete_consecutive_repeats()
    _init_patterns = {
        4: {'TSCH': '06'},
        3: {'TSH': '06', 'SCH': '06'},
        2: {
            'CE': '0*0',
            'CH': '06',
            'CI': '0*0',
            'CY': '0*0',
            'DG': '07',
            'GF': '08',
            'GM': '03',
            'GN': '02',
            'KN': '02',
            'PF': '08',
            'PH': '08',
            'PN': '02',
            'SH': '06',
            'TS': '0*0',
            'WR': '04',
        },
        1: {
            'A': '1',
            'B': '09',
            'C': '07',
            'D': '01',
            'E': '1',
            'F': '08',
            'G': '07',
            'H': '2',
            'I': '1',
            'J': '3',
            'K': '07',
            'L': '05',
            'M': '03',
            'N': '02',
            'O': '1',
            'P': '09',
            'Q': '07',
            'R': '04',
            'S': '0*0',
            'T': '01',
            'U': '1',
            'V': '08',
            'W': '4',
            'X': '07',
            'Y': '5',
            'Z': '0*0',
        },
    }

    _med_patterns = {
        4: {'TSCH': '6'},
        3: {'TSH': '6', 'SCH': '6'},
        2: {
            'CE': '0',
            'CH': '6',
            'CI': '0',
            'CY': '0',
            'DG': '7',
            'PH': '8',
            'SH': '6',
            'TS': '0',
        },
        1: {
            'B': '9',
            'C': '7',
            'D': '1',
            'F': '8',
            'G': '7',
            'J': '6',
            'K': '7',
            'L': '5',
            'M': '3',
            'N': '2',
            'P': '9',
            'Q': '7',
            'R': '4',
            'S': '0',
            'T': '1',
            'V': '8',
            'X': '7',
            'Z': '0',
            'A': '*',
            'E': '*',
            'H': '*',
            'I': '*',
            'O': '*',
            'U': '*',
            'W': '*',
            'Y': '*',
        },
    }

    code = ''
    pos = 0

    # Do first digit(s) first
    for num in range(4, 0, -1):
        if word[:num] in _init_patterns[num]:
            code = _init_patterns[num][word[:num]]
            pos += num
            break

    # Then code subsequent digits
    while pos < len(word):
        for num in range(4, 0, -1):  # pragma: no branch
            if word[pos : pos + num] in _med_patterns[num]:
                code += _med_patterns[num][word[pos : pos + num]]
                pos += num
                break

    code = _delete_consecutive_repeats(code)
    code = code.replace('*', '')

    if zero_pad:
        code += '0' * max_length

    return code[:max_length]


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic.roger_root.
20
21		The phonetic.roger_root module implements the Roger Root phonetic algorithm.
22		"""
23
24	1	from __future__ import unicode_literals
25
26	1	from unicodedata import normalize as unicode_normalize
27
28	1	from six import text_type
29	1	from six.moves import range
30
31	1	from . import _delete_consecutive_repeats
32
33	1	__all__ = ['roger_root']
34
35
36	1	def roger_root(word, max_length=5, zero_pad=True):
37		"""Return the Roger Root code for a word.
38
39		This is Roger Root name coding, described in :cite:`Moore:1977`.
40
41		:param str word: the word to transform
42		:param int max_length: the maximum length (default 5) of the code to return
43		:param bool zero_pad: pad the end of the return value with 0s to achieve a
44		max_length string
45		:returns: the Roger Root code
46		:rtype: str
47
48		>>> roger_root('Christopher')
49		'06401'
50		>>> roger_root('Niall')
51		'02500'
52		>>> roger_root('Smith')
53		'00310'
54		>>> roger_root('Schmidt')
55		'06310'
56		"""
57		# uppercase, normalize, decompose, and filter non-A-Z out
58	1	word = unicode_normalize('NFKD', text_type(word.upper()))
59	1	word = word.replace('ß', 'SS')
60	1	word = ''.join(
61		c
62		for c in word
63		if c
64		in {
65		'A',
66		'B',
67		'C',
68		'D',
69		'E',
70		'F',
71		'G',
72		'H',
73		'I',
74		'J',
75		'K',
76		'L',
77		'M',
78		'N',
79		'O',
80		'P',
81		'Q',
82		'R',
83		'S',
84		'T',
85		'U',
86		'V',
87		'W',
88		'X',
89		'Y',
90		'Z',
91		}
92		)
93
94		# '*' is used to prevent combining by _delete_consecutive_repeats()
95	1	_init_patterns = {
96		4: {'TSCH': '06'},
97		3: {'TSH': '06', 'SCH': '06'},
98		2: {
99		'CE': '0*0',
100		'CH': '06',
101		'CI': '0*0',
102		'CY': '0*0',
103		'DG': '07',
104		'GF': '08',
105		'GM': '03',
106		'GN': '02',
107		'KN': '02',
108		'PF': '08',
109		'PH': '08',
110		'PN': '02',
111		'SH': '06',
112		'TS': '0*0',
113		'WR': '04',
114		},
115		1: {
116		'A': '1',
117		'B': '09',
118		'C': '07',
119		'D': '01',
120		'E': '1',
121		'F': '08',
122		'G': '07',
123		'H': '2',
124		'I': '1',
125		'J': '3',
126		'K': '07',
127		'L': '05',
128		'M': '03',
129		'N': '02',
130		'O': '1',
131		'P': '09',
132		'Q': '07',
133		'R': '04',
134		'S': '0*0',
135		'T': '01',
136		'U': '1',
137		'V': '08',
138		'W': '4',
139		'X': '07',
140		'Y': '5',
141		'Z': '0*0',
142		},
143		}
144
145	1	_med_patterns = {
146		4: {'TSCH': '6'},
147		3: {'TSH': '6', 'SCH': '6'},
148		2: {
149		'CE': '0',
150		'CH': '6',
151		'CI': '0',
152		'CY': '0',
153		'DG': '7',
154		'PH': '8',
155		'SH': '6',
156		'TS': '0',
157		},
158		1: {
159		'B': '9',
160		'C': '7',
161		'D': '1',
162		'F': '8',
163		'G': '7',
164		'J': '6',
165		'K': '7',
166		'L': '5',
167		'M': '3',
168		'N': '2',
169		'P': '9',
170		'Q': '7',
171		'R': '4',
172		'S': '0',
173		'T': '1',
174		'V': '8',
175		'X': '7',
176		'Z': '0',
177		'A': '*',
178		'E': '*',
179		'H': '*',
180		'I': '*',
181		'O': '*',
182		'U': '*',
183		'W': '*',
184		'Y': '*',
185		},
186		}
187
188	1	code = ''
189	1	pos = 0
190
191		# Do first digit(s) first
192	1	for num in range(4, 0, -1):
193	1	if word[:num] in _init_patterns[num]:
194	1	code = _init_patterns[num][word[:num]]
195	1	pos += num
196	1	break
197
198		# Then code subsequent digits
199	1	while pos < len(word):
200	1	for num in range(4, 0, -1): # pragma: no branch
201	1	if word[pos : pos + num] in _med_patterns[num]:
202	1	code += _med_patterns[num][word[pos : pos + num]]
203	1	pos += num
204	1	break
205
206	1	code = _delete_consecutive_repeats(code)
207	1	code = code.replace('*', '')
208
209	1	if zero_pad:
210	1	code += '0' * max_length
211
212	1	return code[:max_length]
213
214
215		if __name__ == '__main__':
216		import doctest
217
218		doctest.testmod()
219

chrislit / abydos

Push — master ( 6ed6e1...91db7a )

abydos.phonetic.roger_root A

Complexity

Size/Duplication

Test Coverage

Importance

1 Function

Duplication Side-by-Side

Filter issues like