Completed
Pull Request — master (#225)
by Chris
09:15
created

abydos.fingerprint._consonant.Consonant.__init__()   A

Complexity

Conditions 4

Size

Total Lines 37
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 12
CRAP Score 4

Importance

Changes 0
Metric Value
eloc 12
dl 0
loc 37
ccs 12
cts 12
cp 1
rs 9.8
c 0
b 0
f 0
cc 4
nop 4
crap 4
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2019 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.fingerprint._consonant.
20
21
Taft's consonant coding
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from itertools import groupby
32
33 1
from ._fingerprint import _Fingerprint
34
35 1
__all__ = ['Consonant']
36
37
38 1
class Consonant(_Fingerprint):
39
    """Consonant Coding Fingerprint.
40
41
    Based on the consonant coding from :cite:`Taft:1970`, variants 1, 2, 3,
42
    1-D, 2-D, and 3-D.
43
44
    .. versionadded:: 0.4.1
45
    """
46
47 1
    def __init__(self, variant=1, doubles=True, vowels=None):
48
        """Initialize Consonant instance.
49
50
        Parameters
51
        ----------
52
        variant : int
53
            Selects between Taft's 3 variants, which assign to the vowel set
54
            one of:
55
56
                1. A, E, I, O, & U
57
                2. A, E, I, O, U, W, & Y
58
                3. A, E, I, O, U, W, H, & Y
59
60
        doubles : bool
61
            If set to False, multiple consonants in a row are conflated to a
62
            single instance.
63
        vowels : list, set, or str
64
            Setting vowels to a non-None value overrides the variant setting
65
            and defines the set of letters to be removed from the input.
66
67
68
        .. versionadded:: 0.4.1
69
70
        """
71 1
        super(_Fingerprint, self).__init__()
72 1
        self._vowels = vowels
73 1
        self._doubles = doubles
74
75 1
        if self._vowels is None:
76 1
            self._vowels = set('AEIOU')
77 1
            if variant > 1:
78 1
                self._vowels.add('W')
79 1
                self._vowels.add('Y')
80 1
            if variant > 2:
81 1
                self._vowels.add('H')
82
        else:
83 1
            self._vowels = {_.upper() for _ in self._vowels}
84
85 1
    def fingerprint(self, word):
86
        """Return the consonant coding.
87
88
        Parameters
89
        ----------
90
        word : str
91
            The word to fingerprint
92
93
        Returns
94
        -------
95
        int
96
            The consonant coding
97
98
        Examples
99
        --------
100
        >>> cf = Consonant()
101
        >>> cf.fingerprint('hat')
102
        'HT'
103
        >>> cf.fingerprint('niall')
104
        'NLL'
105
        >>> cf.fingerprint('colin')
106
        'CLN'
107
        >>> cf.fingerprint('atcg')
108
        'ATCG'
109
        >>> cf.fingerprint('entreatment')
110
        'ENTRTMNT'
111
112
113
        .. versionadded:: 0.4.1
114
115
        """
116
        # uppercase
117 1
        word = word.upper()
118
119
        # remove repeats if in -D variant
120 1
        if not self._doubles:
121 1
            word = ''.join(char for char, _ in groupby(word))
122
123
        # remove vowels
124 1
        word = word[:1] + ''.join(_ for _ in word[1:] if _ not in self._vowels)
125
126 1
        return word
127
128
129
if __name__ == '__main__':
130
    import doctest
131
132
    doctest.testmod()
133