Completed
Branch master (78a222)
by Chris
14:36
created

abydos.fingerprint._speedcop   A

Complexity

Total Complexity 12

Size/Duplication

Total Lines 200
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 12
eloc 116
dl 0
loc 200
ccs 32
cts 32
cp 1
rs 10
c 0
b 0
f 0

2 Functions

Rating   Name   Duplication   Size   Complexity  
B skeleton_key() 0 67 6
B omission_key() 0 88 6
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.fingerprint._speedcop.
20
21
The fingerprint.speedcop module implements string fingerprints developed by
22
Pollock & Zomora in :cite:`Pollock:1984`:
23
24
    - skeleton key
25
    - omission key
26
"""
27
28 1
from __future__ import unicode_literals
29
30 1
from unicodedata import normalize as unicode_normalize
31
32 1
from six import text_type
33
34 1
__all__ = ['omission_key', 'skeleton_key']
35
36
37 1
def skeleton_key(word):
38
    """Return the skeleton key.
39
40
    The skeleton key of a word is defined in :cite:`Pollock:1984`.
41
42
    :param str word: the word to transform into its skeleton key
43
    :returns: the skeleton key
44
    :rtype: str
45
46
    >>> skeleton_key('The quick brown fox jumped over the lazy dog.')
47
    'THQCKBRWNFXJMPDVLZYGEUIOA'
48
    >>> skeleton_key('Christopher')
49
    'CHRSTPIOE'
50
    >>> skeleton_key('Niall')
51
    'NLIA'
52
    """
53 1
    _vowels = {'A', 'E', 'I', 'O', 'U'}
54
55 1
    word = unicode_normalize('NFKD', text_type(word.upper()))
56 1
    word = ''.join(
57
        c
58
        for c in word
59
        if c
60
        in {
61
            'A',
62
            'B',
63
            'C',
64
            'D',
65
            'E',
66
            'F',
67
            'G',
68
            'H',
69
            'I',
70
            'J',
71
            'K',
72
            'L',
73
            'M',
74
            'N',
75
            'O',
76
            'P',
77
            'Q',
78
            'R',
79
            'S',
80
            'T',
81
            'U',
82
            'V',
83
            'W',
84
            'X',
85
            'Y',
86
            'Z',
87
        }
88
    )
89 1
    start = word[0:1]
90 1
    consonant_part = ''
91 1
    vowel_part = ''
92
93
    # add consonants & vowels to to separate strings
94
    # (omitting the first char & duplicates)
95 1
    for char in word[1:]:
96 1
        if char != start:
97 1
            if char in _vowels:
98 1
                if char not in vowel_part:
99 1
                    vowel_part += char
100 1
            elif char not in consonant_part:
101 1
                consonant_part += char
102
    # return the first char followed by consonants followed by vowels
103 1
    return start + consonant_part + vowel_part
104
105
106 1
def omission_key(word):
107
    """Return the omission key.
108
109
    The omission key of a word is defined in :cite:`Pollock:1984`.
110
111
    :param str word: the word to transform into its omission key
112
    :returns: the omission key
113
    :rtype: str
114
115
    >>> omission_key('The quick brown fox jumped over the lazy dog.')
116
    'JKQXZVWYBFMGPDHCLNTREUIOA'
117
    >>> omission_key('Christopher')
118
    'PHCTSRIOE'
119
    >>> omission_key('Niall')
120
    'LNIA'
121
    """
122 1
    _consonants = (
123
        'J',
124
        'K',
125
        'Q',
126
        'X',
127
        'Z',
128
        'V',
129
        'W',
130
        'Y',
131
        'B',
132
        'F',
133
        'M',
134
        'G',
135
        'P',
136
        'D',
137
        'H',
138
        'C',
139
        'L',
140
        'N',
141
        'T',
142
        'S',
143
        'R',
144
    )
145
146 1
    word = unicode_normalize('NFKD', text_type(word.upper()))
147 1
    word = ''.join(
148
        c
149
        for c in word
150
        if c
151
        in {
152
            'A',
153
            'B',
154
            'C',
155
            'D',
156
            'E',
157
            'F',
158
            'G',
159
            'H',
160
            'I',
161
            'J',
162
            'K',
163
            'L',
164
            'M',
165
            'N',
166
            'O',
167
            'P',
168
            'Q',
169
            'R',
170
            'S',
171
            'T',
172
            'U',
173
            'V',
174
            'W',
175
            'X',
176
            'Y',
177
            'Z',
178
        }
179
    )
180
181 1
    key = ''
182
183
    # add consonants in order supplied by _consonants (no duplicates)
184 1
    for char in _consonants:
185 1
        if char in word:
186 1
            key += char
187
188
    # add vowels in order they appeared in the word (no duplicates)
189 1
    for char in word:
190 1
        if char not in _consonants and char not in key:
191 1
            key += char
192
193 1
    return key
194
195
196
if __name__ == '__main__':
197
    import doctest
198
199
    doctest.testmod()
200