Completed
Push — master ( f43547...71985b )
by Chris
12:00 queued 10s
created

statistics_canada()   A

Complexity

Conditions 1

Size

Total Lines 30
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 2
dl 0
loc 30
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phonetic._statistics_canada.
20
21
Statistics Canada phonetic encoding
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from unicodedata import normalize as unicode_normalize
32
33 1
from six import text_type
34
35 1
from ._phonetic import _Phonetic
36
37 1
__all__ = ['StatisticsCanada', 'statistics_canada']
38
39
40 1
class StatisticsCanada(_Phonetic):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
41
    """Statistics Canada code.
42
43
    The original description of this algorithm could not be located, and
44
    may only have been specified in an unpublished TR. The coding does not
45
    appear to be in use by Statistics Canada any longer. In its place, this is
46
    an implementation of the "Census modified Statistics Canada name coding
47
    procedure".
48
49
    The modified version of this algorithm is described in Appendix B of
50
    :cite:`Moore:1977`.
51
    """
52
53 1
    def encode(self, word, max_length=4):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'encode' method
Loading history...
54
        """Return the Statistics Canada code for a word.
55
56
        Parameters
57
        ----------
58
        word : str
59
            The word to transform
60
        max_length : int
61
            The maximum length (default 4) of the code to return
62
63
        Returns
64
        -------
65
        str
66
            The Statistics Canada name code value
67
68
        Examples
69
        --------
70
        >>> pe = StatisticsCanada()
71
        >>> pe.encode('Christopher')
72
        'CHRS'
73
        >>> pe.encode('Niall')
74
        'NL'
75
        >>> pe.encode('Smith')
76
        'SMTH'
77
        >>> pe.encode('Schmidt')
78
        'SCHM'
79
80
        """
81
        # uppercase, normalize, decompose, and filter non-A-Z out
82 1
        word = unicode_normalize('NFKD', text_type(word.upper()))
83 1
        word = word.replace('ß', 'SS')
84 1
        word = ''.join(c for c in word if c in self._uc_set)
85 1
        if not word:
86 1
            return ''
87
88 1
        code = word[1:]
89 1
        for vowel in self._uc_vy_set:
90 1
            code = code.replace(vowel, '')
91 1
        code = word[0] + code
92 1
        code = self._delete_consecutive_repeats(code)
93 1
        code = code.replace(' ', '')
94
95 1
        return code[:max_length]
96
97
98 1
def statistics_canada(word, max_length=4):
99
    """Return the Statistics Canada code for a word.
100
101
    This is a wrapper for :py:meth:`StatisticsCanada.encode`.
102
103
    Parameters
104
    ----------
105
    word : str
106
        The word to transform
107
    max_length : int
108
        The maximum length (default 4) of the code to return
109
110
    Returns
111
    -------
112
    str
113
        The Statistics Canada name code value
114
115
    Examples
116
    --------
117
    >>> statistics_canada('Christopher')
118
    'CHRS'
119
    >>> statistics_canada('Niall')
120
    'NL'
121
    >>> statistics_canada('Smith')
122
    'SMTH'
123
    >>> statistics_canada('Schmidt')
124
    'SCHM'
125
126
    """
127 1
    return StatisticsCanada().encode(word, max_length)
128
129
130
if __name__ == '__main__':
131
    import doctest
132
133
    doctest.testmod()
134