abydos.phonetic.statistics_canada - Code Metrics - Inspection of "applied Black codestyle" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 6ed6e1...91db7a )

by Chris

created 2018-10-24 05:47 UTC

abydos.phonetic.statistics_canada A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	115
Duplicated Lines	0 %

Test Coverage

Coverage

100%

Importance

Changes

Metric	Value
eloc	52
dl	0
loc	115
ccs	19
cts	19
cp	1
rs	10
c	0
b	0
f	0
wmc	3

1 Function

Rating	Name	Duplication	Size	Complexity
B	statistics_canada()	0	73	3

# -*- coding: utf-8 -*-

# Copyright 2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.phonetic.statistics_canada.

The phonetic.statistics_canada module implements the Statistics Canada phonetic
encoding.
"""

from __future__ import unicode_literals

from unicodedata import normalize as unicode_normalize

from six import text_type

from . import _delete_consecutive_repeats

__all__ = ['statistics_canada']


def statistics_canada(word, max_length=4):
    """Return the Statistics Canada code for a word.

    The original description of this algorithm could not be located, and
    may only have been specified in an unpublished TR. The coding does not
    appear to be in use by Statistics Canada any longer. In its place, this is
    an implementation of the "Census modified Statistics Canada name coding
    procedure".

    The modified version of this algorithm is described in Appendix B of
     :cite:`Moore:1977`.

    :param str word: the word to transform
    :param int max_length: the maximum length (default 4) of the code to return
    :returns: the Statistics Canada name code value
    :rtype: str

    >>> statistics_canada('Christopher')
    'CHRS'
    >>> statistics_canada('Niall')
    'NL'
    >>> statistics_canada('Smith')
    'SMTH'
    >>> statistics_canada('Schmidt')
    'SCHM'
    """
    # uppercase, normalize, decompose, and filter non-A-Z out
    word = unicode_normalize('NFKD', text_type(word.upper()))
    word = word.replace('ß', 'SS')
    word = ''.join(
        c
        for c in word
        if c
        in {
            'A',
            'B',
            'C',
            'D',
            'E',
            'F',
            'G',
            'H',
            'I',
            'J',
            'K',
            'L',
            'M',
            'N',
            'O',
            'P',
            'Q',
            'R',
            'S',
            'T',
            'U',
            'V',
            'W',
            'X',
            'Y',
            'Z',
        }
    )
    if not word:
        return ''

    code = word[1:]
    for vowel in {'A', 'E', 'I', 'O', 'U', 'Y'}:
        code = code.replace(vowel, '')
    code = word[0] + code
    code = _delete_consecutive_repeats(code)
    code = code.replace(' ', '')

    return code[:max_length]


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.phonetic.statistics_canada.
20
21		The phonetic.statistics_canada module implements the Statistics Canada phonetic
22		encoding.
23		"""
24
25	1	from __future__ import unicode_literals
26
27	1	from unicodedata import normalize as unicode_normalize
28
29	1	from six import text_type
30
31	1	from . import _delete_consecutive_repeats
32
33	1	__all__ = ['statistics_canada']
34
35
36	1	def statistics_canada(word, max_length=4):
37		"""Return the Statistics Canada code for a word.
38
39		The original description of this algorithm could not be located, and
40		may only have been specified in an unpublished TR. The coding does not
41		appear to be in use by Statistics Canada any longer. In its place, this is
42		an implementation of the "Census modified Statistics Canada name coding
43		procedure".
44
45		The modified version of this algorithm is described in Appendix B of
46		:cite:`Moore:1977`.
47
48		:param str word: the word to transform
49		:param int max_length: the maximum length (default 4) of the code to return
50		:returns: the Statistics Canada name code value
51		:rtype: str
52
53		>>> statistics_canada('Christopher')
54		'CHRS'
55		>>> statistics_canada('Niall')
56		'NL'
57		>>> statistics_canada('Smith')
58		'SMTH'
59		>>> statistics_canada('Schmidt')
60		'SCHM'
61		"""
62		# uppercase, normalize, decompose, and filter non-A-Z out
63	1	word = unicode_normalize('NFKD', text_type(word.upper()))
64	1	word = word.replace('ß', 'SS')
65	1	word = ''.join(
66		c
67		for c in word
68		if c
69		in {
70		'A',
71		'B',
72		'C',
73		'D',
74		'E',
75		'F',
76		'G',
77		'H',
78		'I',
79		'J',
80		'K',
81		'L',
82		'M',
83		'N',
84		'O',
85		'P',
86		'Q',
87		'R',
88		'S',
89		'T',
90		'U',
91		'V',
92		'W',
93		'X',
94		'Y',
95		'Z',
96		}
97		)
98	1	if not word:
99	1	return ''
100
101	1	code = word[1:]
102	1	for vowel in {'A', 'E', 'I', 'O', 'U', 'Y'}:
103	1	code = code.replace(vowel, '')
104	1	code = word[0] + code
105	1	code = _delete_consecutive_repeats(code)
106	1	code = code.replace(' ', '')
107
108	1	return code[:max_length]
109
110
111		if __name__ == '__main__':
112		import doctest
113
114		doctest.testmod()
115

chrislit / abydos

Push — master ( 6ed6e1...91db7a )

abydos.phonetic.statistics_canada A

Complexity

Size/Duplication

Test Coverage

Importance

1 Function

Duplication Side-by-Side

Filter issues like