Test Failed
Push — master ( d1b33f...9f504a )
by Chris
15:58
created

SPEEDCOPTestCases.test_omission_key()   A

Complexity

Conditions 1

Size

Total Lines 19
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 15
nop 1
dl 0
loc 19
rs 9.65
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.tests.test_fingerprint.
20
21
This module contains unit tests for abydos.fingerprint
22
"""
23
24
from __future__ import unicode_literals
25
26
import unittest
27
28
import abydos.phonetic as phonetic
29
from abydos.fingerprint import count_fingerprint, occurrence_fingerprint, \
30
    occurrence_halved_fingerprint, omission_key, phonetic_fingerprint, \
31
    position_fingerprint, qgram_fingerprint, skeleton_key, str_fingerprint, \
32
    synoname_toolcode
33
34
from six.moves import range
35
36
37
NIALL = ('Niall', 'Neal', 'Neil', 'Njall', 'Njáll', 'Nigel', 'Neel', 'Nele',
38
         'Nigelli', 'Nel', 'Kneale', 'Uí Néill', 'O\'Neill', 'MacNeil',
39
         'MacNele', 'Niall Noígíallach')
40
41
42
class FingerprintTestCases(unittest.TestCase):
43
    """Test fingerprint functions.
44
45
    abydos.fingerprint.str_fingerprint, .qgram_fingerprint, &
46
    .phonetic_fingerprint
47
    """
48
49
    _testset = ('À noite, vovô Kowalsky vê o ímã cair no pé do pingüim \
50
queixoso e vovó põe açúcar no chá de tâmaras do jabuti feliz.', )
51
    _anssetw = ('a acucar cair cha de do e feliz ima jabuti kowalsky no noite \
52
o pe pinguim poe queixoso tamaras ve vovo', )
53
    _anssetq2 = ('abacadaialamanarasbucachcudedoeaedeieleoetevfeguhaifiminirit\
54
ixizjakokylilsmamqngnoocoeoiojokoposovowpepipoqurarnsdsksotatetiucueuiutvevowa\
55
xoyv', )
56
    _anssetq1 = ('abcdefghijklmnopqrstuvwxyz', )
57
58
    def test_str_fingerprint(self):
59
        """Test abydos.clustering.str_fingerprint."""
60
        # Base case
61
        self.assertEqual(str_fingerprint(''), '')
62
63
        for i in range(len(self._testset)):
64
            self.assertEqual(str_fingerprint(self._testset[i]),
65
                             self._anssetw[i])
66
67
    def test_qgram_fingerprint(self):
68
        """Test abydos.clustering.qgram_fingerprint."""
69
        # Base case
70
        self.assertEqual(qgram_fingerprint(''), '')
71
72
        for i in range(len(self._testset)):
73
            self.assertEqual(qgram_fingerprint(self._testset[i], 1),
74
                             self._anssetq1[i])
75
            self.assertEqual(qgram_fingerprint(self._testset[i], 2),
76
                             self._anssetq2[i])
77
            self.assertEqual(qgram_fingerprint(self._testset[i]),
78
                             self._anssetq2[i])
79
80
        qgram_fp_niall = ('aliallni', 'aleane', 'eiilne', 'aljallnj',
81
                          'aljallnj', 'elgeigni', 'eeelne', 'ellene',
82
                          'elgeiglillni', 'elne', 'aleaknlene', 'eiilinllneui',
83
                          'eiilllneon', 'accneiilmane', 'accnellemane',
84
                          'acalchgiiaiglalllnninooi')
85
        for i in range(len(NIALL)):
86
            self.assertEqual(qgram_fingerprint(NIALL[i]), qgram_fp_niall[i])
87
88
    def test_phonetic_fingerprint(self):
89
        """Test abydos.clustering.phonetic_fingerprint."""
90
        # Base case
91
        self.assertEqual(phonetic_fingerprint(''), '')
92
93
        self.assertEqual(phonetic_fingerprint(' '.join(NIALL)),
94
                         'a anl mknl njl nklk nl')
95
        self.assertEqual(phonetic_fingerprint(' '.join(NIALL),
96
                                              phonetic.phonet),
97
                         'knile makneil maknele neil nel nele nial nigeli ' +
98
                         'nigl nil noigialach oneil ui')
99
        self.assertEqual(phonetic_fingerprint(' '.join(NIALL),
100
                                              phonetic.soundex),
101
                         'k540 m254 n240 n242 n400 o540 u000')
102
103
104
class SPEEDCOPTestCases(unittest.TestCase):
105
    """Test SPEEDCOP functions.
106
107
    abydos.fingerprint.skeleton_key & .omission_key
108
    """
109
110
    def test_skeleton_key(self):
111
        """Test abydos.clustering.skeleton_key."""
112
        # Base case
113
        self.assertEqual(skeleton_key(''), '')
114
115
        # http://dl.acm.org/citation.cfm?id=358048
116
        self.assertEqual(skeleton_key('chemogenic'), 'CHMGNEOI')
117
        self.assertEqual(skeleton_key('chemomagnetic'), 'CHMGNTEOAI')
118
        self.assertEqual(skeleton_key('chemcal'), 'CHMLEA')
119
        self.assertEqual(skeleton_key('chemcial'), 'CHMLEIA')
120
        self.assertEqual(skeleton_key('chemical'), 'CHMLEIA')
121
        self.assertEqual(skeleton_key('chemicial'), 'CHMLEIA')
122
        self.assertEqual(skeleton_key('chimical'), 'CHMLIA')
123
        self.assertEqual(skeleton_key('chemiluminescence'), 'CHMLNSEIU')
124
        self.assertEqual(skeleton_key('chemiluminescent'), 'CHMLNSTEIU')
125
        self.assertEqual(skeleton_key('chemicals'), 'CHMLSEIA')
126
        self.assertEqual(skeleton_key('chemically'), 'CHMLYEIA')
127
128
    def test_omission_key(self):
129
        """Test abydos.clustering.omission_key."""
130
        # Base case
131
        self.assertEqual(omission_key(''), '')
132
133
        # http://dl.acm.org/citation.cfm?id=358048
134
        self.assertEqual(omission_key('microelectronics'), 'MCLNTSRIOE')
135
        self.assertEqual(omission_key('circumstantial'), 'MCLNTSRIUA')
136
        self.assertEqual(omission_key('luminescent'), 'MCLNTSUIE')
137
        self.assertEqual(omission_key('multinucleate'), 'MCLNTUIEA')
138
        self.assertEqual(omission_key('multinucleon'), 'MCLNTUIEO')
139
        self.assertEqual(omission_key('cumulene'), 'MCLNUE')
140
        self.assertEqual(omission_key('luminance'), 'MCLNUIAE')
141
        self.assertEqual(omission_key('coelomic'), 'MCLOEI')
142
        self.assertEqual(omission_key('molecule'), 'MCLOEU')
143
        self.assertEqual(omission_key('cameral'), 'MCLRAE')
144
        self.assertEqual(omission_key('caramel'), 'MCLRAE')
145
        self.assertEqual(omission_key('maceral'), 'MCLRAE')
146
        self.assertEqual(omission_key('lacrimal'), 'MCLRAI')
147
148
149
class LightweightFingerprintsTestCases(unittest.TestCase):
150
    """Test Cisłak & Grabowski lightweight fingerprint functions.
151
152
    abydos.clustering.occurrence_fingerprint, .occurrence_halved_fingerprint,
153
    .count_fingerprint, & .position_fingerprint
154
    """
155
156
    def test_occurrence_fingerprint(self):
157
        """Test abydos.occurrence_fingerprint."""
158
        # Base case
159
        self.assertEqual(occurrence_fingerprint(''), 0)
160
161
        # https://arxiv.org/pdf/1711.08475.pdf
162
        self.assertEqual(occurrence_fingerprint('instance'),
163
                         0b1110111000010000)
164
165
        self.assertEqual(occurrence_fingerprint('inst'),
166
                         0b0100111000000000)
167
        self.assertEqual(occurrence_fingerprint('instance', 15),
168
                         0b111011100001000)
169
        self.assertEqual(occurrence_fingerprint('instance', 32),
170
                         0b11101110000100000000000000000000)
171
        self.assertEqual(occurrence_fingerprint('instance', 64),
172
                         0b11101110000100000000000000000000<<32)
173
174
    def test_occurrence_halved_fingerprint(self):
175
        """Test abydos.occurrence_halved_fingerprint."""
176
        # Base case
177
        self.assertEqual(occurrence_halved_fingerprint(''), 0)
178
179
        # https://arxiv.org/pdf/1711.08475.pdf
180
        self.assertEqual(occurrence_halved_fingerprint('instance'),
181
                         0b0110010010111000)
182
183
        self.assertEqual(occurrence_halved_fingerprint('inst'),
184
                         0b0001000010100100)
185
        self.assertEqual(occurrence_halved_fingerprint('instance', 15),
186
                         0b0110010010111000)
187
        self.assertEqual(occurrence_halved_fingerprint('instance', 32),
188
                         0b01100100101110000000000100000000)
189
        self.assertEqual(occurrence_halved_fingerprint('instance', 64),
190
                         0b01100100101110000000000100000000<<32)
191
192
193
    def test_count_fingerprint(self):
194
        """Test abydos.count_fingerprint."""
195
        # Base case
196
        self.assertEqual(count_fingerprint(''), 0)
197
198
        # https://arxiv.org/pdf/1711.08475.pdf
199
        self.assertEqual(count_fingerprint('instance'),
200
                         0b0101010001100100)
201
202
        self.assertEqual(count_fingerprint('inst'),
203
                         0b0001000001010100)
204
        self.assertEqual(count_fingerprint('instance', 15),
205
                         0b0101010001100100)
206
        self.assertEqual(count_fingerprint('instance', 32),
207
                         0b01010100011001000000000100000000)
208
        self.assertEqual(count_fingerprint('instance', 64),
209
                         0b01010100011001000000000100000000<<32)
210
211
    def test_position_fingerprint(self):
212
        """Test abydos.position_fingerprint."""
213
        # Base case
214
        self.assertEqual(position_fingerprint(''),
215
                         0b1111111111111111)
216
217
        # https://arxiv.org/pdf/1711.08475.pdf
218
        self.assertEqual(position_fingerprint('instance'),
219
                         0b1110111001110001)
220
221
        self.assertEqual(position_fingerprint('instance'),
222
                         0b1110111001110001)
223
        self.assertEqual(position_fingerprint('instance', 15),
224
                         0b111011100111000)
225
        self.assertEqual(position_fingerprint('instance', 32),
226
                         0b11101110011100000101011111111111)
227
        self.assertEqual(position_fingerprint('instance', 64),
228
                         0xee7057ffefffffff)
229
230
231
class SynonameToolcodeTestCases(unittest.TestCase):
232
    """Test Synoname Toolcode function.
233
234
    abydps.fingerprint.synoname_toolcode
235
    """
236
237
    def test_synoname_toolcode(self):
238
        """Test abydos.synoname_toolcode."""
239
        # Base case
240
        self.assertEqual(synoname_toolcode(''), ('', '', '0000000000$$'))
241
242
        # from Synoname demo
243
        self.assertEqual(synoname_toolcode('angelico', 'fra'),
244
                         ('angelico', 'fra', '0000000308$044a$af'))
245
        self.assertEqual(synoname_toolcode('Aelst', 'Willem van', ''),
246
                         ('aelst', 'willem van', '0000001005$143a$awv'))
247
        self.assertEqual(synoname_toolcode('Afro'),
248
                         ('afro', '', '0000000004$$a'))
249
        self.assertEqual(synoname_toolcode('Afro', 'Basaldella'),
250
                         ('afro', 'basaldella', '0000001004$$ab'))
251
        self.assertEqual(synoname_toolcode('Albright', 'Ivan'),
252
                         ('albright', 'ivan', '0000000408$$ai'))
253
        self.assertEqual(synoname_toolcode('Antonello da Messina'),
254
                         ('antonello da messina', '', '0000000020$022b$adm'))
255
        self.assertEqual(synoname_toolcode('Albright', 'Ivan Le Lorraine'),
256
                         ('albright', 'ivan le lorraine',
257
                          '0000001608$067b$ail'))
258
        self.assertEqual(synoname_toolcode('Bazille', 'Frederic',
259
                                           'Attributed to'),
260
                         ('bazille', 'frederic', '1000000807$$bf'))
261
        self.assertEqual(synoname_toolcode('Bazille', 'Frederick',
262
                                           'Attributed to'),
263
                         ('bazille', 'frederick', '1000000907$$bf'))
264
        self.assertEqual(synoname_toolcode('Beerstraaten', 'Jan Abrahamsz.'),
265
                         ('beerstraaten', 'jan abrahamsz.', '0200001412$$bja'))
266
        self.assertEqual(synoname_toolcode('Bonifacio di Pitati'),
267
                         ('bonifacio di pitati', '', '0000000019$035b$bdp'))
268
        self.assertEqual(synoname_toolcode('Breughel the Younger', 'Jan'),
269
                         ('breughel the younger', 'jan',
270
                          '0020000320$134b$btyj'))
271
        self.assertEqual(synoname_toolcode('Brown', 'W. W.'),
272
                         ('brown', 'w. w.', '0200000505$$bw'))
273
        self.assertEqual(synoname_toolcode('Brueghel II (the Younger)', 'Jan'),
274
                         ('brueghel ii (the younger)', 'jan',
275
                          '0120490325$049b134b$bityj'))
276
        self.assertEqual(synoname_toolcode('Brueghel II (the Younger)',
277
                                           'Pieter', 'Workshop of'),
278
                         ('brueghel ii (the younger)', 'pieter',
279
                          '3120490625$049b134b$bityp'))
280
        self.assertEqual(synoname_toolcode('Bugiardini',
281
                                           'Guiliano di Piero di Simone'),
282
                         ('bugiardini', 'guiliano di piero di simone',
283
                          '0000002710$035b035b$bgdps'))
284
        self.assertEqual(synoname_toolcode('Caravaggio', '', 'Follower of'),
285
                         ('caravaggio', '', '3000000010$$c'))
286
        self.assertEqual(synoname_toolcode('Caravaggio',
287
                                           'Michelangelo Merisi da',
288
                                           'Follower of'),
289
                         ('caravaggio', 'michelangelo merisi da',
290
                          '3000002210$022a$cmd'))
291
        self.assertEqual(synoname_toolcode('Oost the Younger', 'Jacob van'),
292
                         ('oost the younger', 'jacob van',
293
                          '0020000916$134b143a$otyjv'))
294
295
        # additional tests for coverage
296
        self.assertEqual(synoname_toolcode('Cato the Elder', '', 'Copy of'),
297
                         ('cato the elder', '', '2010000014$133b$cte'))
298
        self.assertEqual(synoname_toolcode('Cato, the Elder', normalize=2),
299
                         ('cato the elder', '', '0110000014$133b$cte'))
300
        self.assertEqual(synoname_toolcode('Cato the Elder', normalize=2),
301
                         ('cato the elder', '', '0010000014$133b$cte'))
302
        self.assertEqual(synoname_toolcode('Lorem ipsum dolor sit amet, '+
303
                                           'consectetur adipiscing elit, sed '+
304
                                           'do eiusmod tempor incididunt ut '+
305
                                           'labore et dolore magna aliqua. '+
306
                                           'Nulla aliquet porttitor lacus '+
307
                                           'luctus accumsan tortor posuere. '+
308
                                           'Egestas purus viverra accumsan '+
309
                                           'in. Ultrices mi tempus imperdiet '+
310
                                           'nulla malesuada pellentesque '+
311
                                           'elit eget gravida. Proin libero '+
312
                                           'nunc consequat interdum varius '+
313
                                           'sit amet mattis vulputate. '+
314
                                           'Mauris ultrices eros in cursus '+
315
                                           'turpis massa tincidunt dui. '+
316
                                           'Faucibus in ornare quam viverra '+
317
                                           'orci sagittis eu volutpat odio. '+
318
                                           'Enim blandit volutpat maecenas '+
319
                                           'volutpat blandit aliquam etiam. '+
320
                                           'Vel quam elementum pulvinar '+
321
                                           'etiam. Duis ut diam quam nulla '+
322
                                           'porttitor massa id.',
323
                                           normalize=1)[2],
324
                         '02000060626$068d$lidsacetumnpvgflo')
325
        self.assertEqual(synoname_toolcode('Sainte-Vincent'),
326
                         ('sainte-vincent', '', '0100000014$110c$sv'))
327
        self.assertEqual(synoname_toolcode('Lorem', 'Sainte-Vincent'),
328
                         ('lorem', 'sainte-vincent',
329
                          '0100001405$068d110b$lsvlo'))
330
        self.assertEqual(synoname_toolcode('Louis II', 'Jean'),
331
                         ('louis ii', 'jean', '0000490408$049b068d$lijlo'))
332
        self.assertEqual(synoname_toolcode('Louis', 'Jean II', normalize=2),
333
                         ('louis ii', 'jean', '0000490705$049a068d$ljilo'))
334
        self.assertEqual(synoname_toolcode('Louis', 'Jean II ', normalize=2),
335
                         ('louis ii', 'jean', '0000490805$049b068d$ljilo'))
336
        self.assertEqual(synoname_toolcode('Louis V.', 'Jean', normalize=2),
337
                         ('louis v.', 'jean', '0200000408$068d$lvjlo'))
338
        self.assertEqual(synoname_toolcode('Louis V.', 'Ste.-Jean Ste.',
339
                                           normalize=2),
340
                         ('louis v.', 'ste.-jean ste.',
341
                          '0200001408$068d127b127X$lvsjlo ste'))
342
343
344
if __name__ == '__main__':
345
    unittest.main()
346