Completed
Push — master ( cd9fca...2c922f )
by Chris
13:16
created

ChebyshevTestCases.test_sim_chebyshev()   A

Complexity

Conditions 1

Size

Total Lines 25
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 19
nop 1
dl 0
loc 25
rs 9.45
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.tests.test_distance.
20
21
This module contains unit tests for abydos.distance
22
"""
23
24
from __future__ import division, unicode_literals
25
26
import math
27
import os
28
import pkgutil
29
import sys
30
import unittest
31
from difflib import SequenceMatcher
32
33
from abydos.compression import ac_train
34
# noinspection PyProtectedMember
35
from abydos.distance import _synoname_strip_punct, bag, chebyshev, \
36
    damerau_levenshtein, dist, dist_bag, dist_baystat, dist_compression, \
37
    dist_cosine, dist_damerau, dist_dice, dist_editex,  dist_euclidean, \
38
    dist_eudex, dist_hamming, dist_ident, dist_indel, dist_jaccard, \
39
    dist_jaro_winkler, dist_lcsseq, dist_lcsstr, dist_length, \
40
    dist_levenshtein, dist_manhattan, dist_minkowski, dist_mlipns, \
41
    dist_monge_elkan, dist_mra, dist_overlap, dist_prefix, \
42
    dist_ratcliff_obershelp, dist_sift4, dist_strcmp95, dist_suffix, \
43
    dist_tversky, dist_typo, editex, euclidean, eudex_hamming, gotoh, \
44
    hamming, lcsseq, lcsstr, levenshtein, manhattan, minkowski, mra_compare, \
45
    needleman_wunsch, sift4_common, sift4_simplest, sim, sim_bag, \
46
    sim_baystat, sim_compression, sim_cosine, sim_damerau,  sim_dice, \
47
    sim_editex, sim_euclidean, sim_eudex, sim_hamming, sim_ident, sim_indel, \
48
    sim_jaccard, sim_jaro_winkler, sim_lcsseq, sim_lcsstr, sim_length, \
49
    sim_levenshtein, sim_manhattan, sim_matrix, sim_minkowski, sim_mlipns, \
50
    sim_monge_elkan, sim_mra, sim_overlap, sim_prefix, \
51
    sim_ratcliff_obershelp, sim_sift4, sim_strcmp95, sim_suffix, \
52
    sim_tanimoto, sim_tversky, sim_typo, smith_waterman, synoname, \
53
    _synoname_word_approximation, tanimoto, typo
54
from abydos.qgram import QGrams
55
56
from six.moves import range
57
58
TESTDIR = os.path.dirname(__file__)
59
60
NIALL = ('Niall', 'Neal', 'Neil', 'Njall', 'Njáll', 'Nigel', 'Neel', 'Nele',
61
         'Nigelli', 'Nel', 'Kneale', 'Uí Néill', 'O\'Neill', 'MacNeil',
62
         'MacNele', 'Niall Noígíallach')
63
64
COLIN = ('Colin', 'Collin', 'Cullen', 'Cuilen', 'Cailean', 'MacCailean',
65
         'Cuilén', 'Colle', 'Calum', 'Callum', 'Colinn', 'Colon', 'Colynn',
66
         'Col', 'Cole', 'Nicolas', 'Nicholas', 'Cailean Mór Caimbeul')
67
68
69
class LevenshteinTestCases(unittest.TestCase):
70
    """Test Levenshtein functions.
71
72
    abydos.distance.levenshtein, .dist_levenshtein,
73
    .sim_levenshtein, .damerau, .dist_damerau, & .sim_damerau
74
    """
75
76
    def test_levenshtein(self):
77
        """Test abydos.distance.levenshtein."""
78
        self.assertEqual(levenshtein('', ''), 0)
79
80
        # http://oldfashionedsoftware.com/tag/levenshtein-distance/
81
        self.assertEqual(levenshtein('a', ''), 1)
82
        self.assertEqual(levenshtein('', 'a'), 1)
83
        self.assertEqual(levenshtein('abc', ''), 3)
84
        self.assertEqual(levenshtein('', 'abc'), 3)
85
        self.assertEqual(levenshtein('', ''), 0)
86
        self.assertEqual(levenshtein('a', 'a'), 0)
87
        self.assertEqual(levenshtein('abc', 'abc'), 0)
88
        self.assertEqual(levenshtein('', 'a'), 1)
89
        self.assertEqual(levenshtein('a', 'ab'), 1)
90
        self.assertEqual(levenshtein('b', 'ab'), 1)
91
        self.assertEqual(levenshtein('ac', 'abc'), 1)
92
        self.assertEqual(levenshtein('abcdefg', 'xabxcdxxefxgx'), 6)
93
        self.assertEqual(levenshtein('a', ''), 1)
94
        self.assertEqual(levenshtein('ab', 'a'), 1)
95
        self.assertEqual(levenshtein('ab', 'b'), 1)
96
        self.assertEqual(levenshtein('abc', 'ac'), 1)
97
        self.assertEqual(levenshtein('xabxcdxxefxgx', 'abcdefg'), 6)
98
        self.assertEqual(levenshtein('a', 'b'), 1)
99
        self.assertEqual(levenshtein('ab', 'ac'), 1)
100
        self.assertEqual(levenshtein('ac', 'bc'), 1)
101
        self.assertEqual(levenshtein('abc', 'axc'), 1)
102
        self.assertEqual(levenshtein('xabxcdxxefxgx', '1ab2cd34ef5g6'), 6)
103
        self.assertEqual(levenshtein('example', 'samples'), 3)
104
        self.assertEqual(levenshtein('sturgeon', 'urgently'), 6)
105
        self.assertEqual(levenshtein('levenshtein', 'frankenstein'), 6)
106
        self.assertEqual(levenshtein('distance', 'difference'), 5)
107
        self.assertEqual(levenshtein('java was neat', 'scala is great'), 7)
108
109
        # https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
110
        self.assertEqual(levenshtein('CA', 'ABC', 'dam'), 2)
111
        self.assertEqual(levenshtein('CA', 'ABC', 'osa'), 3)
112
113
        # test cost of insert
114
        self.assertEqual(levenshtein('', 'b', 'lev', cost=(5, 7, 10, 10)), 5)
115
        self.assertEqual(levenshtein('', 'b', 'osa', cost=(5, 7, 10, 10)), 5)
116
        self.assertEqual(levenshtein('', 'b', 'dam', cost=(5, 7, 10, 10)), 5)
117
        self.assertEqual(levenshtein('a', 'ab', 'lev', cost=(5, 7, 10, 10)), 5)
118
        self.assertEqual(levenshtein('a', 'ab', 'osa', cost=(5, 7, 10, 10)), 5)
119
        self.assertEqual(levenshtein('a', 'ab', 'dam', cost=(5, 7, 10, 10)), 5)
120
121
        # test cost of delete
122
        self.assertEqual(levenshtein('b', '', 'lev', cost=(5, 7, 10, 10)), 7)
123
        self.assertEqual(levenshtein('b', '', 'osa', cost=(5, 7, 10, 10)), 7)
124
        self.assertEqual(levenshtein('b', '', 'dam', cost=(5, 7, 10, 10)), 7)
125
        self.assertEqual(levenshtein('ab', 'a', 'lev', cost=(5, 7, 10, 10)), 7)
126
        self.assertEqual(levenshtein('ab', 'a', 'osa', cost=(5, 7, 10, 10)), 7)
127
        self.assertEqual(levenshtein('ab', 'a', 'dam', cost=(5, 7, 10, 10)), 7)
128
129
        # test cost of substitute
130
        self.assertEqual(levenshtein('a', 'b', 'lev', cost=(10, 10, 5, 10)), 5)
131
        self.assertEqual(levenshtein('a', 'b', 'osa', cost=(10, 10, 5, 10)), 5)
132
        self.assertEqual(levenshtein('a', 'b', 'dam', cost=(10, 10, 5, 10)), 5)
133
        self.assertEqual(levenshtein('ac', 'bc', 'lev',
134
                                     cost=(10, 10, 5, 10)), 5)
135
        self.assertEqual(levenshtein('ac', 'bc', 'osa',
136
                                     cost=(10, 10, 5, 10)), 5)
137
        self.assertEqual(levenshtein('ac', 'bc', 'dam',
138
                                     cost=(10, 10, 5, 10)), 5)
139
140
        # test cost of transpose
141
        self.assertEqual(levenshtein('ab', 'ba', 'lev',
142
                                     cost=(10, 10, 10, 5)), 20)
143
        self.assertEqual(levenshtein('ab', 'ba', 'osa',
144
                                     cost=(10, 10, 10, 5)), 5)
145
        self.assertEqual(levenshtein('ab', 'ba', 'dam',
146
                                     cost=(5, 5, 10, 5)), 5)
147
        self.assertEqual(levenshtein('abc', 'bac', 'lev',
148
                                     cost=(10, 10, 10, 5)), 20)
149
        self.assertEqual(levenshtein('abc', 'bac', 'osa',
150
                                     cost=(10, 10, 10, 5)), 5)
151
        self.assertEqual(levenshtein('abc', 'bac', 'dam',
152
                                     cost=(5, 5, 10, 5)), 5)
153
        self.assertEqual(levenshtein('cab', 'cba', 'lev',
154
                                     cost=(10, 10, 10, 5)), 20)
155
        self.assertEqual(levenshtein('cab', 'cba', 'osa',
156
                                     cost=(10, 10, 10, 5)), 5)
157
        self.assertEqual(levenshtein('cab', 'cba', 'dam',
158
                                     cost=(5, 5, 10, 5)), 5)
159
160
        # test exception
161
        self.assertRaises(ValueError, levenshtein, 'ab', 'ba', 'dam',
162
                          cost=(10, 10, 10, 5))
163
164
    def test_dist_levenshtein(self):
165
        """Test abydos.distance.dist_levenshtein."""
166
        self.assertEqual(dist_levenshtein('', ''), 0)
167
168
        self.assertEqual(dist_levenshtein('a', 'a'), 0)
169
        self.assertEqual(dist_levenshtein('ab', 'ab'), 0)
170
        self.assertEqual(dist_levenshtein('', 'a'), 1)
171
        self.assertEqual(dist_levenshtein('', 'ab'), 1)
172
        self.assertEqual(dist_levenshtein('a', 'c'), 1)
173
174
        self.assertAlmostEqual(dist_levenshtein('abc', 'ac'), 1/3)
175
        self.assertAlmostEqual(dist_levenshtein('abbc', 'ac'), 1/2)
176
        self.assertAlmostEqual(dist_levenshtein('abbc', 'abc'), 1/4)
177
178
    def test_sim_levenshtein(self):
179
        """Test abydos.distance.sim_levenshtein."""
180
        self.assertEqual(sim_levenshtein('', ''), 1)
181
182
        self.assertEqual(sim_levenshtein('a', 'a'), 1)
183
        self.assertEqual(sim_levenshtein('ab', 'ab'), 1)
184
        self.assertEqual(sim_levenshtein('', 'a'), 0)
185
        self.assertEqual(sim_levenshtein('', 'ab'), 0)
186
        self.assertEqual(sim_levenshtein('a', 'c'), 0)
187
188
        self.assertAlmostEqual(sim_levenshtein('abc', 'ac'), 2/3)
189
        self.assertAlmostEqual(sim_levenshtein('abbc', 'ac'), 1/2)
190
        self.assertAlmostEqual(sim_levenshtein('abbc', 'abc'), 3/4)
191
192
    def test_damerau_levenshtein(self):
193
        """Test abydos.distance.damerau_levenshtein."""
194
        self.assertEqual(damerau_levenshtein('', ''), 0)
195
        self.assertEqual(damerau_levenshtein('CA', 'CA'), 0)
196
        self.assertEqual(damerau_levenshtein('CA', 'ABC'), 2)
197
        self.assertEqual(damerau_levenshtein('', 'b', cost=(5, 7, 10, 10)), 5)
198
        self.assertEqual(damerau_levenshtein('a', 'ab', cost=(5, 7, 10, 10)),
199
                         5)
200
        self.assertEqual(damerau_levenshtein('b', '', cost=(5, 7, 10, 10)), 7)
201
        self.assertEqual(damerau_levenshtein('ab', 'a', cost=(5, 7, 10, 10)),
202
                         7)
203
        self.assertEqual(damerau_levenshtein('a', 'b', cost=(10, 10, 5, 10)),
204
                         5)
205
        self.assertEqual(damerau_levenshtein('ac', 'bc',
206
                                             cost=(10, 10, 5, 10)), 5)
207
        self.assertEqual(damerau_levenshtein('ab', 'ba',
208
                                             cost=(5, 5, 10, 5)), 5)
209
        self.assertEqual(damerau_levenshtein('abc', 'bac',
210
                                             cost=(5, 5, 10, 5)), 5)
211
        self.assertEqual(damerau_levenshtein('cab', 'cba',
212
                                             cost=(5, 5, 10, 5)), 5)
213
        self.assertRaises(ValueError, damerau_levenshtein, 'ab', 'ba',
214
                          cost=(10, 10, 10, 5))
215
216
    def test_dist_damerau(self):
217
        """Test abydos.distance.dist_damerau."""
218
        self.assertEqual(dist_damerau('', ''), 0)
219
220
        self.assertEqual(dist_damerau('a', 'a'), 0)
221
        self.assertEqual(dist_damerau('ab', 'ab'), 0)
222
        self.assertEqual(dist_damerau('', 'a'), 1)
223
        self.assertEqual(dist_damerau('', 'ab'), 1)
224
        self.assertEqual(dist_damerau('a', 'c'), 1)
225
226
        self.assertAlmostEqual(dist_damerau('abc', 'ac'), 1/3)
227
        self.assertAlmostEqual(dist_damerau('abbc', 'ac'), 1/2)
228
        self.assertAlmostEqual(dist_damerau('abbc', 'abc'), 1/4)
229
230
        self.assertAlmostEqual(dist_damerau('CA', 'ABC'), 2/3)
231
        self.assertAlmostEqual(dist_damerau('', 'b', cost=(5, 7, 10, 10)), 1)
232
        self.assertAlmostEqual(dist_damerau('a', 'ab',
233
                                            cost=(5, 7, 10, 10)), 1/2)
234
        self.assertAlmostEqual(dist_damerau('b', '', cost=(5, 7, 10, 10)), 1)
235
        self.assertAlmostEqual(dist_damerau('ab', 'a',
236
                                            cost=(5, 7, 10, 10)), 1/2)
237
        self.assertAlmostEqual(dist_damerau('a', 'b',
238
                                            cost=(10, 10, 5, 10)), 1/2)
239
        self.assertAlmostEqual(dist_damerau('ac', 'bc',
240
                                            cost=(10, 10, 5, 10)), 1/4)
241
        self.assertAlmostEqual(dist_damerau('ab', 'ba',
242
                                            cost=(5, 5, 10, 5)), 1/2)
243
        self.assertAlmostEqual(dist_damerau('abc', 'bac',
244
                                            cost=(5, 5, 10, 5)), 1/3)
245
        self.assertAlmostEqual(dist_damerau('cab', 'cba',
246
                                            cost=(5, 5, 10, 5)), 1/3)
247
        self.assertRaises(ValueError, dist_damerau, 'ab', 'ba',
248
                          cost=(10, 10, 10, 5))
249
250
    def test_sim_damerau(self):
251
        """Test abydos.distance.sim_damerau."""
252
        self.assertEqual(sim_damerau('', ''), 1)
253
254
        self.assertEqual(sim_damerau('a', 'a'), 1)
255
        self.assertEqual(sim_damerau('ab', 'ab'), 1)
256
        self.assertEqual(sim_damerau('', 'a'), 0)
257
        self.assertEqual(sim_damerau('', 'ab'), 0)
258
        self.assertEqual(sim_damerau('a', 'c'), 0)
259
260
        self.assertAlmostEqual(sim_damerau('abc', 'ac'), 2/3)
261
        self.assertAlmostEqual(sim_damerau('abbc', 'ac'), 1/2)
262
        self.assertAlmostEqual(sim_damerau('abbc', 'abc'), 3/4)
263
264
        self.assertAlmostEqual(sim_damerau('CA', 'ABC'), 1/3)
265
        self.assertAlmostEqual(sim_damerau('', 'b', cost=(5, 7, 10, 10)), 0)
266
        self.assertAlmostEqual(sim_damerau('a', 'ab', cost=(5, 7, 10, 10)),
267
                               1/2)
268
        self.assertAlmostEqual(sim_damerau('b', '', cost=(5, 7, 10, 10)), 0)
269
        self.assertAlmostEqual(sim_damerau('ab', 'a', cost=(5, 7, 10, 10)),
270
                               1/2)
271
        self.assertAlmostEqual(sim_damerau('a', 'b', cost=(10, 10, 5, 10)),
272
                               1/2)
273
        self.assertAlmostEqual(sim_damerau('ac', 'bc',
274
                                           cost=(10, 10, 5, 10)), 3/4)
275
        self.assertAlmostEqual(sim_damerau('ab', 'ba',
276
                                           cost=(5, 5, 10, 5)), 1/2)
277
        self.assertAlmostEqual(sim_damerau('abc', 'bac',
278
                                           cost=(5, 5, 10, 5)), 2/3)
279
        self.assertAlmostEqual(sim_damerau('cab', 'cba',
280
                                           cost=(5, 5, 10, 5)), 2/3)
281
        self.assertRaises(ValueError, sim_damerau, 'ab', 'ba',
282
                          cost=(10, 10, 10, 5))
283
284
285
class HammingTestCases(unittest.TestCase):
286
    """Test Hamming functions.
287
288
    abydos.distance.hamming, .dist_hamming, & .sim_hamming
289
    """
290
291
    def test_hamming(self):
292
        """Test abydos.distance.hamming."""
293
        self.assertEqual(hamming('', ''), 0)
294
        self.assertEqual(hamming('', '', False), 0)
295
296
        self.assertEqual(hamming('a', ''), 1)
297
        self.assertEqual(hamming('a', 'a'), 0)
298
        self.assertEqual(hamming('a', 'a', False), 0)
299
        self.assertEqual(hamming('a', 'b'), 1)
300
        self.assertEqual(hamming('a', 'b', False), 1)
301
        self.assertEqual(hamming('abc', 'cba'), 2)
302
        self.assertEqual(hamming('abc', 'cba', False), 2)
303
        self.assertEqual(hamming('abc', ''), 3)
304
        self.assertEqual(hamming('bb', 'cbab'), 3)
305
306
        # test exception
307
        self.assertRaises(ValueError, hamming, 'ab', 'a', False)
308
309
        # https://en.wikipedia.org/wiki/Hamming_distance
310
        self.assertEqual(hamming('karolin', 'kathrin'), 3)
311
        self.assertEqual(hamming('karolin', 'kerstin'), 3)
312
        self.assertEqual(hamming('1011101', '1001001'), 2)
313
        self.assertEqual(hamming('2173896', '2233796'), 3)
314
315
    def test_dist_hamming(self):
316
        """Test abydos.distance.dist_hamming."""
317
        self.assertEqual(dist_hamming('', ''), 0)
318
        self.assertEqual(dist_hamming('', '', False), 0)
319
320
        self.assertEqual(dist_hamming('a', ''), 1)
321
        self.assertEqual(dist_hamming('a', 'a'), 0)
322
        self.assertEqual(dist_hamming('a', 'a', False), 0)
323
        self.assertEqual(dist_hamming('a', 'b'), 1)
324
        self.assertEqual(dist_hamming('a', 'b', False), 1)
325
        self.assertAlmostEqual(dist_hamming('abc', 'cba'), 2/3)
326
        self.assertAlmostEqual(dist_hamming('abc', 'cba', False), 2/3)
327
        self.assertEqual(dist_hamming('abc', ''), 1)
328
        self.assertAlmostEqual(dist_hamming('bb', 'cbab'), 3/4)
329
330
        # test exception
331
        self.assertRaises(ValueError, dist_hamming, 'ab', 'a', False)
332
333
        # https://en.wikipedia.org/wiki/Hamming_distance
334
        self.assertAlmostEqual(dist_hamming('karolin', 'kathrin'), 3/7)
335
        self.assertAlmostEqual(dist_hamming('karolin', 'kerstin'), 3/7)
336
        self.assertAlmostEqual(dist_hamming('1011101', '1001001'), 2/7)
337
        self.assertAlmostEqual(dist_hamming('2173896', '2233796'), 3/7)
338
339
    def test_sim_hamming(self):
340
        """Test abydos.distance.sim_hamming."""
341
        self.assertEqual(sim_hamming('', ''), 1)
342
        self.assertEqual(sim_hamming('', '', False), 1)
343
344
        self.assertEqual(sim_hamming('a', ''), 0)
345
        self.assertEqual(sim_hamming('a', 'a'), 1)
346
        self.assertEqual(sim_hamming('a', 'a', False), 1)
347
        self.assertEqual(sim_hamming('a', 'b'), 0)
348
        self.assertEqual(sim_hamming('a', 'b', False), 0)
349
        self.assertAlmostEqual(sim_hamming('abc', 'cba'), 1/3)
350
        self.assertAlmostEqual(sim_hamming('abc', 'cba', False), 1/3)
351
        self.assertEqual(sim_hamming('abc', ''), 0)
352
        self.assertAlmostEqual(sim_hamming('bb', 'cbab'), 1/4)
353
354
        # test exception
355
        self.assertRaises(ValueError, sim_hamming, 'ab', 'a', False)
356
357
        # https://en.wikipedia.org/wiki/Hamming_distance
358
        self.assertAlmostEqual(sim_hamming('karolin', 'kathrin'), 4/7)
359
        self.assertAlmostEqual(sim_hamming('karolin', 'kerstin'), 4/7)
360
        self.assertAlmostEqual(sim_hamming('1011101', '1001001'), 5/7)
361
        self.assertAlmostEqual(sim_hamming('2173896', '2233796'), 4/7)
362
363
364
NONQ_FROM = 'The quick brown fox jumped over the lazy dog.'
365
NONQ_TO = 'That brown dog jumped over the fox.'
366
367
368
class TverskyIndexTestCases(unittest.TestCase):
369
    """Test Tversky functions.
370
371
    abydos.distance.sim_tversky & .dist_tversky
372
    """
373
374
    def test_sim_tversky(self):
375
        """Test abydos.distance.sim_tversky."""
376
        self.assertEqual(sim_tversky('', ''), 1)
377
        self.assertEqual(sim_tversky('nelson', ''), 0)
378
        self.assertEqual(sim_tversky('', 'neilsen'), 0)
379
        self.assertAlmostEqual(sim_tversky('nelson', 'neilsen'), 4/11)
380
381
        self.assertEqual(sim_tversky('', '', 2), 1)
382
        self.assertEqual(sim_tversky('nelson', '', 2), 0)
383
        self.assertEqual(sim_tversky('', 'neilsen', 2), 0)
384
        self.assertAlmostEqual(sim_tversky('nelson', 'neilsen', 2), 4/11)
385
386
        # test valid alpha & beta
387
        self.assertRaises(ValueError, sim_tversky, 'abcd', 'dcba', 2, -1, -1)
388
        self.assertRaises(ValueError, sim_tversky, 'abcd', 'dcba', 2, -1, 0)
389
        self.assertRaises(ValueError, sim_tversky, 'abcd', 'dcba', 2, 0, -1)
390
391
        # test empty QGrams
392
        self.assertAlmostEqual(sim_tversky('nelson', 'neilsen', 7), 0.0)
393
394
        # test unequal alpha & beta
395
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 1), 3/11)
396
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 1, 2), 3/10)
397
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 2), 3/13)
398
399
        # test bias parameter
400
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 1, 1, 0.5),
401
                               7/11)
402
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 1, 0.5), 7/9)
403
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 1, 2, 0.5),
404
                               7/15)
405
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 2, 0.5),
406
                               7/11)
407
408
        # supplied q-gram tests
409
        self.assertEqual(sim_tversky(QGrams(''), QGrams('')), 1)
410
        self.assertEqual(sim_tversky(QGrams('nelson'), QGrams('')), 0)
411
        self.assertEqual(sim_tversky(QGrams(''), QGrams('neilsen')), 0)
412
        self.assertAlmostEqual(sim_tversky(QGrams('nelson'),
413
                                           QGrams('neilsen')), 4/11)
414
415
        # non-q-gram tests
416
        self.assertEqual(sim_tversky('', '', 0), 1)
417
        self.assertEqual(sim_tversky('the quick', '', 0), 0)
418
        self.assertEqual(sim_tversky('', 'the quick', 0), 0)
419
        self.assertAlmostEqual(sim_tversky(NONQ_FROM, NONQ_TO, 0), 1/3)
420
        self.assertAlmostEqual(sim_tversky(NONQ_TO, NONQ_FROM, 0), 1/3)
421
422
    def test_dist_tversky(self):
423
        """Test abydos.distance.dist_tversky."""
424
        self.assertEqual(dist_tversky('', ''), 0)
425
        self.assertEqual(dist_tversky('nelson', ''), 1)
426
        self.assertEqual(dist_tversky('', 'neilsen'), 1)
427
        self.assertAlmostEqual(dist_tversky('nelson', 'neilsen'), 7/11)
428
429
        self.assertEqual(dist_tversky('', '', 2), 0)
430
        self.assertEqual(dist_tversky('nelson', '', 2), 1)
431
        self.assertEqual(dist_tversky('', 'neilsen', 2), 1)
432
        self.assertAlmostEqual(dist_tversky('nelson', 'neilsen', 2), 7/11)
433
434
        # test valid alpha & beta
435
        self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, -1, -1)
436
        self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, -1, 0)
437
        self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, 0, -1)
438
439
        # test empty QGrams
440
        self.assertAlmostEqual(dist_tversky('nelson', 'neilsen', 7), 1.0)
441
442
        # test unequal alpha & beta
443
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 1), 8/11)
444
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 2), 7/10)
445
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 2), 10/13)
446
447
        # test bias parameter
448
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 1, 0.5),
449
                               4/11)
450
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 1, 0.5),
451
                               2/9)
452
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 2, 0.5),
453
                               8/15)
454
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 2, 0.5),
455
                               4/11)
456
457
        # supplied q-gram tests
458
        self.assertEqual(dist_tversky(QGrams(''), QGrams('')), 0)
459
        self.assertEqual(dist_tversky(QGrams('nelson'), QGrams('')), 1)
460
        self.assertEqual(dist_tversky(QGrams(''), QGrams('neilsen')), 1)
461
        self.assertAlmostEqual(dist_tversky(QGrams('nelson'),
462
                                            QGrams('neilsen')), 7/11)
463
464
        # non-q-gram tests
465
        self.assertEqual(dist_tversky('', '', 0), 0)
466
        self.assertEqual(dist_tversky('the quick', '', 0), 1)
467
        self.assertEqual(dist_tversky('', 'the quick', 0), 1)
468
        self.assertAlmostEqual(dist_tversky(NONQ_FROM, NONQ_TO, 0), 2/3)
469
        self.assertAlmostEqual(dist_tversky(NONQ_TO, NONQ_FROM, 0), 2/3)
470
471
472
class DiceTestCases(unittest.TestCase):
473
    """Test Dice functions.
474
475
    abydos.distance.sim_dice & .dist_dice
476
    """
477
478
    def test_sim_dice(self):
479
        """Test abydos.distance.sim_dice."""
480
        self.assertEqual(sim_dice('', ''), 1)
481
        self.assertEqual(sim_dice('nelson', ''), 0)
482
        self.assertEqual(sim_dice('', 'neilsen'), 0)
483
        self.assertAlmostEqual(sim_dice('nelson', 'neilsen'), 8/15)
484
485
        self.assertEqual(sim_dice('', '', 2), 1)
486
        self.assertEqual(sim_dice('nelson', '', 2), 0)
487
        self.assertEqual(sim_dice('', 'neilsen', 2), 0)
488
        self.assertAlmostEqual(sim_dice('nelson', 'neilsen', 2), 8/15)
489
490
        # supplied q-gram tests
491
        self.assertEqual(sim_dice(QGrams(''), QGrams('')), 1)
492
        self.assertEqual(sim_dice(QGrams('nelson'), QGrams('')), 0)
493
        self.assertEqual(sim_dice(QGrams(''), QGrams('neilsen')), 0)
494
        self.assertAlmostEqual(sim_dice(QGrams('nelson'), QGrams('neilsen')),
495
                               8/15)
496
497
        # non-q-gram tests
498
        self.assertEqual(sim_dice('', '', 0), 1)
499
        self.assertEqual(sim_dice('the quick', '', 0), 0)
500
        self.assertEqual(sim_dice('', 'the quick', 0), 0)
501
        self.assertAlmostEqual(sim_dice(NONQ_FROM, NONQ_TO, 0), 1/2)
502
        self.assertAlmostEqual(sim_dice(NONQ_TO, NONQ_FROM, 0), 1/2)
503
504
    def test_dist_dice(self):
505
        """Test abydos.distance.dist_dice."""
506
        self.assertEqual(dist_dice('', ''), 0)
507
        self.assertEqual(dist_dice('nelson', ''), 1)
508
        self.assertEqual(dist_dice('', 'neilsen'), 1)
509
        self.assertAlmostEqual(dist_dice('nelson', 'neilsen'), 7/15)
510
511
        self.assertEqual(dist_dice('', '', 2), 0)
512
        self.assertEqual(dist_dice('nelson', '', 2), 1)
513
        self.assertEqual(dist_dice('', 'neilsen', 2), 1)
514
        self.assertAlmostEqual(dist_dice('nelson', 'neilsen', 2), 7/15)
515
516
        # supplied q-gram tests
517
        self.assertEqual(dist_dice(QGrams(''), QGrams('')), 0)
518
        self.assertEqual(dist_dice(QGrams('nelson'), QGrams('')), 1)
519
        self.assertEqual(dist_dice(QGrams(''), QGrams('neilsen')), 1)
520
        self.assertAlmostEqual(dist_dice(QGrams('nelson'), QGrams('neilsen')),
521
                               7/15)
522
523
        # non-q-gram tests
524
        self.assertEqual(dist_dice('', '', 0), 0)
525
        self.assertEqual(dist_dice('the quick', '', 0), 1)
526
        self.assertEqual(dist_dice('', 'the quick', 0), 1)
527
        self.assertAlmostEqual(dist_dice(NONQ_FROM, NONQ_TO, 0), 1/2)
528
        self.assertAlmostEqual(dist_dice(NONQ_TO, NONQ_FROM, 0), 1/2)
529
530
531
class JaccardTestCases(unittest.TestCase):
532
    """Test Jaccard functions.
533
534
    abydos.distance.sim_jaccard & .dist_jaccard
535
    """
536
537
    def test_sim_jaccard(self):
538
        """Test abydos.distance.sim_jaccard."""
539
        self.assertEqual(sim_jaccard('', ''), 1)
540
        self.assertEqual(sim_jaccard('nelson', ''), 0)
541
        self.assertEqual(sim_jaccard('', 'neilsen'), 0)
542
        self.assertAlmostEqual(sim_jaccard('nelson', 'neilsen'), 4/11)
543
544
        self.assertEqual(sim_jaccard('', '', 2), 1)
545
        self.assertEqual(sim_jaccard('nelson', '', 2), 0)
546
        self.assertEqual(sim_jaccard('', 'neilsen', 2), 0)
547
        self.assertAlmostEqual(sim_jaccard('nelson', 'neilsen', 2), 4/11)
548
549
        # supplied q-gram tests
550
        self.assertEqual(sim_jaccard(QGrams(''), QGrams('')), 1)
551
        self.assertEqual(sim_jaccard(QGrams('nelson'), QGrams('')), 0)
552
        self.assertEqual(sim_jaccard(QGrams(''), QGrams('neilsen')), 0)
553
        self.assertAlmostEqual(sim_jaccard(QGrams('nelson'),
554
                                           QGrams('neilsen')), 4/11)
555
556
        # non-q-gram tests
557
        self.assertEqual(sim_jaccard('', '', 0), 1)
558
        self.assertEqual(sim_jaccard('the quick', '', 0), 0)
559
        self.assertEqual(sim_jaccard('', 'the quick', 0), 0)
560
        self.assertAlmostEqual(sim_jaccard(NONQ_FROM, NONQ_TO, 0), 1/3)
561
        self.assertAlmostEqual(sim_jaccard(NONQ_TO, NONQ_FROM, 0), 1/3)
562
563
    def test_dist_jaccard(self):
564
        """Test abydos.distance.dist_jaccard."""
565
        self.assertEqual(dist_jaccard('', ''), 0)
566
        self.assertEqual(dist_jaccard('nelson', ''), 1)
567
        self.assertEqual(dist_jaccard('', 'neilsen'), 1)
568
        self.assertAlmostEqual(dist_jaccard('nelson', 'neilsen'), 7/11)
569
570
        self.assertEqual(dist_jaccard('', '', 2), 0)
571
        self.assertEqual(dist_jaccard('nelson', '', 2), 1)
572
        self.assertEqual(dist_jaccard('', 'neilsen', 2), 1)
573
        self.assertAlmostEqual(dist_jaccard('nelson', 'neilsen', 2), 7/11)
574
575
        # supplied q-gram tests
576
        self.assertEqual(dist_jaccard(QGrams(''), QGrams('')), 0)
577
        self.assertEqual(dist_jaccard(QGrams('nelson'), QGrams('')), 1)
578
        self.assertEqual(dist_jaccard(QGrams(''), QGrams('neilsen')), 1)
579
        self.assertAlmostEqual(dist_jaccard(QGrams('nelson'),
580
                                            QGrams('neilsen')), 7/11)
581
582
        # non-q-gram tests
583
        self.assertEqual(dist_jaccard('', '', 0), 0)
584
        self.assertEqual(dist_jaccard('the quick', '', 0), 1)
585
        self.assertEqual(dist_jaccard('', 'the quick', 0), 1)
586
        self.assertAlmostEqual(dist_jaccard(NONQ_FROM, NONQ_TO, 0), 2/3)
587
        self.assertAlmostEqual(dist_jaccard(NONQ_TO, NONQ_FROM, 0), 2/3)
588
589
590
class OverlapTestCases(unittest.TestCase):
591
    """Test overlap functions.
592
593
    abydos.distance.sim_overlap & .dist_overlap
594
    """
595
596
    def test_sim_overlap(self):
597
        """Test abydos.distance.sim_overlap."""
598
        self.assertEqual(sim_overlap('', ''), 1)
599
        self.assertEqual(sim_overlap('nelson', ''), 0)
600
        self.assertEqual(sim_overlap('', 'neilsen'), 0)
601
        self.assertAlmostEqual(sim_overlap('nelson', 'neilsen'), 4/7)
602
603
        self.assertEqual(sim_overlap('', '', 2), 1)
604
        self.assertEqual(sim_overlap('nelson', '', 2), 0)
605
        self.assertEqual(sim_overlap('', 'neilsen', 2), 0)
606
        self.assertAlmostEqual(sim_overlap('nelson', 'neilsen', 2), 4/7)
607
608
        # supplied q-gram tests
609
        self.assertEqual(sim_overlap(QGrams(''), QGrams('')), 1)
610
        self.assertEqual(sim_overlap(QGrams('nelson'), QGrams('')), 0)
611
        self.assertEqual(sim_overlap(QGrams(''), QGrams('neilsen')), 0)
612
        self.assertAlmostEqual(sim_overlap(QGrams('nelson'),
613
                                           QGrams('neilsen')), 4/7)
614
615
        # non-q-gram tests
616
        self.assertEqual(sim_overlap('', '', 0), 1)
617
        self.assertEqual(sim_overlap('the quick', '', 0), 0)
618
        self.assertEqual(sim_overlap('', 'the quick', 0), 0)
619
        self.assertAlmostEqual(sim_overlap(NONQ_FROM, NONQ_TO, 0), 4/7)
620
        self.assertAlmostEqual(sim_overlap(NONQ_TO, NONQ_FROM, 0), 4/7)
621
622
    def test_dist_overlap(self):
623
        """Test abydos.distance.dist_overlap."""
624
        self.assertEqual(dist_overlap('', ''), 0)
625
        self.assertEqual(dist_overlap('nelson', ''), 1)
626
        self.assertEqual(dist_overlap('', 'neilsen'), 1)
627
        self.assertAlmostEqual(dist_overlap('nelson', 'neilsen'), 3/7)
628
629
        self.assertEqual(dist_overlap('', '', 2), 0)
630
        self.assertEqual(dist_overlap('nelson', '', 2), 1)
631
        self.assertEqual(dist_overlap('', 'neilsen', 2), 1)
632
        self.assertAlmostEqual(dist_overlap('nelson', 'neilsen', 2), 3/7)
633
634
        # supplied q-gram tests
635
        self.assertEqual(dist_overlap(QGrams(''), QGrams('')), 0)
636
        self.assertEqual(dist_overlap(QGrams('nelson'), QGrams('')), 1)
637
        self.assertEqual(dist_overlap(QGrams(''), QGrams('neilsen')), 1)
638
        self.assertAlmostEqual(dist_overlap(QGrams('nelson'),
639
                                            QGrams('neilsen')), 3/7)
640
641
        # non-q-gram tests
642
        self.assertEqual(dist_overlap('', '', 0), 0)
643
        self.assertEqual(dist_overlap('the quick', '', 0), 1)
644
        self.assertEqual(dist_overlap('', 'the quick', 0), 1)
645
        self.assertAlmostEqual(dist_overlap(NONQ_FROM, NONQ_TO, 0), 3/7)
646
        self.assertAlmostEqual(dist_overlap(NONQ_TO, NONQ_FROM, 0), 3/7)
647
648
649
class TanimotoTestCases(unittest.TestCase):
650
    """Test Tanimoto functions.
651
652
    abydos.distance.sim_tanimoto & .tanimoto
653
    """
654
655
    def test_tanimoto_coeff(self):
656
        """Test abydos.distance.sim_tanimoto."""
657
        self.assertEqual(sim_tanimoto('', ''), 1)
658
        self.assertEqual(sim_tanimoto('nelson', ''), 0)
659
        self.assertEqual(sim_tanimoto('', 'neilsen'), 0)
660
        self.assertAlmostEqual(sim_tanimoto('nelson', 'neilsen'), 4/11)
661
662
        self.assertEqual(sim_tanimoto('', '', 2), 1)
663
        self.assertEqual(sim_tanimoto('nelson', '', 2), 0)
664
        self.assertEqual(sim_tanimoto('', 'neilsen', 2), 0)
665
        self.assertAlmostEqual(sim_tanimoto('nelson', 'neilsen', 2), 4/11)
666
667
        # supplied q-gram tests
668
        self.assertEqual(sim_tanimoto(QGrams(''), QGrams('')), 1)
669
        self.assertEqual(sim_tanimoto(QGrams('nelson'), QGrams('')), 0)
670
        self.assertEqual(sim_tanimoto(QGrams(''), QGrams('neilsen')), 0)
671
        self.assertAlmostEqual(sim_tanimoto(QGrams('nelson'),
672
                                            QGrams('neilsen')), 4/11)
673
674
        # non-q-gram tests
675
        self.assertEqual(sim_tanimoto('', '', 0), 1)
676
        self.assertEqual(sim_tanimoto('the quick', '', 0), 0)
677
        self.assertEqual(sim_tanimoto('', 'the quick', 0), 0)
678
        self.assertAlmostEqual(sim_tanimoto(NONQ_FROM, NONQ_TO, 0), 1/3)
679
        self.assertAlmostEqual(sim_tanimoto(NONQ_TO, NONQ_FROM, 0), 1/3)
680
681
    def test_tanimoto(self):
682
        """Test abydos.distance.tanimoto."""
683
        self.assertEqual(tanimoto('', ''), 0)
684
        self.assertEqual(tanimoto('nelson', ''), float('-inf'))
685
        self.assertEqual(tanimoto('', 'neilsen'), float('-inf'))
686
        self.assertAlmostEqual(tanimoto('nelson', 'neilsen'),
687
                               math.log(4/11, 2))
688
689
        self.assertEqual(tanimoto('', '', 2), 0)
690
        self.assertEqual(tanimoto('nelson', '', 2), float('-inf'))
691
        self.assertEqual(tanimoto('', 'neilsen', 2), float('-inf'))
692
        self.assertAlmostEqual(tanimoto('nelson', 'neilsen', 2),
693
                               math.log(4/11, 2))
694
695
        # supplied q-gram tests
696
        self.assertEqual(tanimoto(QGrams(''), QGrams('')), 0)
697
        self.assertEqual(tanimoto(QGrams('nelson'), QGrams('')), float('-inf'))
698
        self.assertEqual(tanimoto(QGrams(''), QGrams('neilsen')),
699
                         float('-inf'))
700
        self.assertAlmostEqual(tanimoto(QGrams('nelson'), QGrams('neilsen')),
701
                               math.log(4/11, 2))
702
703
        # non-q-gram tests
704
        self.assertEqual(tanimoto('', '', 0), 0)
705
        self.assertEqual(tanimoto('the quick', '', 0), float('-inf'))
706
        self.assertEqual(tanimoto('', 'the quick', 0), float('-inf'))
707
        self.assertAlmostEqual(tanimoto(NONQ_FROM, NONQ_TO, 0),
708
                               math.log(1/3, 2))
709
        self.assertAlmostEqual(tanimoto(NONQ_TO, NONQ_FROM, 0),
710
                               math.log(1/3, 2))
711
712
713
class MinkowskiTestCases(unittest.TestCase):
714
    """Test Minkowski functions.
715
716
    abydos.distance.minkowski, sim_minkowski & .dist_minkowski
717
    """
718
719
    def test_minkowski(self):
720
        """Test abydos.distance.minkowski."""
721
        self.assertEqual(minkowski('', ''), 0)
722
        self.assertEqual(minkowski('nelson', ''), 7)
723
        self.assertEqual(minkowski('', 'neilsen'), 8)
724
        self.assertAlmostEqual(minkowski('nelson', 'neilsen'), 7)
725
726
        self.assertEqual(minkowski('', '', 2), 0)
727
        self.assertEqual(minkowski('nelson', '', 2), 7)
728
        self.assertEqual(minkowski('', 'neilsen', 2), 8)
729
        self.assertAlmostEqual(minkowski('nelson', 'neilsen', 2), 7)
730
731
        # supplied q-gram tests
732
        self.assertEqual(minkowski(QGrams(''), QGrams('')), 0)
733
        self.assertEqual(minkowski(QGrams('nelson'), QGrams('')), 7)
734
        self.assertEqual(minkowski(QGrams(''), QGrams('neilsen')), 8)
735
        self.assertAlmostEqual(minkowski(QGrams('nelson'),
736
                                         QGrams('neilsen')), 7)
737
738
        # non-q-gram tests
739
        self.assertEqual(minkowski('', '', 0), 0)
740
        self.assertEqual(minkowski('the quick', '', 0), 2)
741
        self.assertEqual(minkowski('', 'the quick', 0), 2)
742
        self.assertAlmostEqual(minkowski(NONQ_FROM, NONQ_TO, 0), 8)
743
        self.assertAlmostEqual(minkowski(NONQ_TO, NONQ_FROM, 0), 8)
744
745
        # test l_0 "norm"
746
        self.assertEqual(minkowski('', '', 1, 0), 0)
747
        self.assertEqual(minkowski('a', '', 1, 0), 1)
748
        self.assertEqual(minkowski('a', 'b', 1, 0), 2)
749
        self.assertEqual(minkowski('ab', 'b', 1, 0), 1)
750
        self.assertEqual(minkowski('aab', 'b', 1, 0), 1)
751
        self.assertEqual(minkowski('', '', 1, 0, True), 0)
752
        self.assertEqual(minkowski('a', '', 1, 0, True), 1)
753
        self.assertEqual(minkowski('a', 'b', 1, 0, True), 1)
754
        self.assertEqual(minkowski('ab', 'b', 1, 0, True), 1/2)
755
        self.assertEqual(minkowski('aab', 'b', 1, 0, True), 1/2)
756
        self.assertEqual(minkowski('aaab', 'b', 1, 0, True), 1/2)
757
        self.assertEqual(minkowski('aaab', 'ab', 1, 0, True), 1/2)
758
759
        # test with alphabet
760
        self.assertEqual(minkowski('ab', 'b', 1, alphabet=26), 1)
761
        self.assertEqual(minkowski('ab', 'b', 1, normalized=True, alphabet=26),
762
                         1/26)
763
        self.assertEqual(minkowski('ab', 'b', 1, normalized=True,
764
                                   alphabet='abcdefghijklmnopqrstuvwxyz'),
765
                         1/26)
766
767
    def test_sim_minkowski(self):
768
        """Test abydos.distance.sim_minkowski."""
769
        self.assertEqual(sim_minkowski('', ''), 1)
770
        self.assertEqual(sim_minkowski('nelson', ''), 0)
771
        self.assertEqual(sim_minkowski('', 'neilsen'), 0)
772
        self.assertAlmostEqual(sim_minkowski('nelson', 'neilsen'), 8/15)
773
774
        self.assertEqual(sim_minkowski('', '', 2), 1)
775
        self.assertEqual(sim_minkowski('nelson', '', 2), 0)
776
        self.assertEqual(sim_minkowski('', 'neilsen', 2), 0)
777
        self.assertAlmostEqual(sim_minkowski('nelson', 'neilsen', 2), 8/15)
778
779
        # supplied q-gram tests
780
        self.assertEqual(sim_minkowski(QGrams(''), QGrams('')), 1)
781
        self.assertEqual(sim_minkowski(QGrams('nelson'), QGrams('')), 0)
782
        self.assertEqual(sim_minkowski(QGrams(''), QGrams('neilsen')), 0)
783
        self.assertAlmostEqual(sim_minkowski(QGrams('nelson'),
784
                                             QGrams('neilsen')), 8/15)
785
786
        # non-q-gram tests
787
        self.assertEqual(sim_minkowski('', '', 0), 1)
788
        self.assertEqual(sim_minkowski('the quick', '', 0), 0)
789
        self.assertEqual(sim_minkowski('', 'the quick', 0), 0)
790
        self.assertAlmostEqual(sim_minkowski(NONQ_FROM, NONQ_TO, 0), 1/2)
791
        self.assertAlmostEqual(sim_minkowski(NONQ_TO, NONQ_FROM, 0), 1/2)
792
793
    def test_dist_minkowski(self):
794
        """Test abydos.distance.dist_minkowski."""
795
        self.assertEqual(dist_minkowski('', ''), 0)
796
        self.assertEqual(dist_minkowski('nelson', ''), 1)
797
        self.assertEqual(dist_minkowski('', 'neilsen'), 1)
798
        self.assertAlmostEqual(dist_minkowski('nelson', 'neilsen'), 7/15)
799
800
        self.assertEqual(dist_minkowski('', '', 2), 0)
801
        self.assertEqual(dist_minkowski('nelson', '', 2), 1)
802
        self.assertEqual(dist_minkowski('', 'neilsen', 2), 1)
803
        self.assertAlmostEqual(dist_minkowski('nelson', 'neilsen', 2), 7/15)
804
805
        # supplied q-gram tests
806
        self.assertEqual(dist_minkowski(QGrams(''), QGrams('')), 0)
807
        self.assertEqual(dist_minkowski(QGrams('nelson'), QGrams('')), 1)
808
        self.assertEqual(dist_minkowski(QGrams(''), QGrams('neilsen')), 1)
809
        self.assertAlmostEqual(dist_minkowski(QGrams('nelson'),
810
                                              QGrams('neilsen')), 7/15)
811
812
        # non-q-gram tests
813
        self.assertEqual(dist_minkowski('', '', 0), 0)
814
        self.assertEqual(dist_minkowski('the quick', '', 0), 1)
815
        self.assertEqual(dist_minkowski('', 'the quick', 0), 1)
816
        self.assertAlmostEqual(dist_minkowski(NONQ_FROM, NONQ_TO, 0), 1/2)
817
        self.assertAlmostEqual(dist_minkowski(NONQ_TO, NONQ_FROM, 0), 1/2)
818
819
820
class ManhattanTestCases(unittest.TestCase):
821
    """Test Manhattan functions.
822
823
    abydos.distance.manhattan, sim_manhattan & .dist_manhattan
824
    """
825
826
    def test_manhattan(self):
827
        """Test abydos.distance.manhattan."""
828
        self.assertEqual(manhattan('', ''), 0)
829
        self.assertEqual(manhattan('nelson', ''), 7)
830
        self.assertEqual(manhattan('', 'neilsen'), 8)
831
        self.assertAlmostEqual(manhattan('nelson', 'neilsen'), 7)
832
833
        self.assertEqual(manhattan('', '', 2), 0)
834
        self.assertEqual(manhattan('nelson', '', 2), 7)
835
        self.assertEqual(manhattan('', 'neilsen', 2), 8)
836
        self.assertAlmostEqual(manhattan('nelson', 'neilsen', 2), 7)
837
838
        # supplied q-gram tests
839
        self.assertEqual(manhattan(QGrams(''), QGrams('')), 0)
840
        self.assertEqual(manhattan(QGrams('nelson'), QGrams('')), 7)
841
        self.assertEqual(manhattan(QGrams(''), QGrams('neilsen')), 8)
842
        self.assertAlmostEqual(manhattan(QGrams('nelson'),
843
                                         QGrams('neilsen')), 7)
844
845
        # non-q-gram tests
846
        self.assertEqual(manhattan('', '', 0), 0)
847
        self.assertEqual(manhattan('the quick', '', 0), 2)
848
        self.assertEqual(manhattan('', 'the quick', 0), 2)
849
        self.assertAlmostEqual(manhattan(NONQ_FROM, NONQ_TO, 0), 8)
850
        self.assertAlmostEqual(manhattan(NONQ_TO, NONQ_FROM, 0), 8)
851
852
    def test_sim_manhattan(self):
853
        """Test abydos.distance.sim_manhattan."""
854
        self.assertEqual(sim_manhattan('', ''), 1)
855
        self.assertEqual(sim_manhattan('nelson', ''), 0)
856
        self.assertEqual(sim_manhattan('', 'neilsen'), 0)
857
        self.assertAlmostEqual(sim_manhattan('nelson', 'neilsen'), 8/15)
858
859
        self.assertEqual(sim_manhattan('', '', 2), 1)
860
        self.assertEqual(sim_manhattan('nelson', '', 2), 0)
861
        self.assertEqual(sim_manhattan('', 'neilsen', 2), 0)
862
        self.assertAlmostEqual(sim_manhattan('nelson', 'neilsen', 2), 8/15)
863
864
        # supplied q-gram tests
865
        self.assertEqual(sim_manhattan(QGrams(''), QGrams('')), 1)
866
        self.assertEqual(sim_manhattan(QGrams('nelson'), QGrams('')), 0)
867
        self.assertEqual(sim_manhattan(QGrams(''), QGrams('neilsen')), 0)
868
        self.assertAlmostEqual(sim_manhattan(QGrams('nelson'),
869
                                             QGrams('neilsen')), 8/15)
870
871
        # non-q-gram tests
872
        self.assertEqual(sim_manhattan('', '', 0), 1)
873
        self.assertEqual(sim_manhattan('the quick', '', 0), 0)
874
        self.assertEqual(sim_manhattan('', 'the quick', 0), 0)
875
        self.assertAlmostEqual(sim_manhattan(NONQ_FROM, NONQ_TO, 0), 1/2)
876
        self.assertAlmostEqual(sim_manhattan(NONQ_TO, NONQ_FROM, 0), 1/2)
877
878
    def test_dist_manhattan(self):
879
        """Test abydos.distance.dist_manhattan."""
880
        self.assertEqual(dist_manhattan('', ''), 0)
881
        self.assertEqual(dist_manhattan('nelson', ''), 1)
882
        self.assertEqual(dist_manhattan('', 'neilsen'), 1)
883
        self.assertAlmostEqual(dist_manhattan('nelson', 'neilsen'), 7/15)
884
885
        self.assertEqual(dist_manhattan('', '', 2), 0)
886
        self.assertEqual(dist_manhattan('nelson', '', 2), 1)
887
        self.assertEqual(dist_manhattan('', 'neilsen', 2), 1)
888
        self.assertAlmostEqual(dist_manhattan('nelson', 'neilsen', 2), 7/15)
889
890
        # supplied q-gram tests
891
        self.assertEqual(dist_manhattan(QGrams(''), QGrams('')), 0)
892
        self.assertEqual(dist_manhattan(QGrams('nelson'), QGrams('')), 1)
893
        self.assertEqual(dist_manhattan(QGrams(''), QGrams('neilsen')), 1)
894
        self.assertAlmostEqual(dist_manhattan(QGrams('nelson'),
895
                                              QGrams('neilsen')), 7/15)
896
897
        # non-q-gram tests
898
        self.assertEqual(dist_manhattan('', '', 0), 0)
899
        self.assertEqual(dist_manhattan('the quick', '', 0), 1)
900
        self.assertEqual(dist_manhattan('', 'the quick', 0), 1)
901
        self.assertAlmostEqual(dist_manhattan(NONQ_FROM, NONQ_TO, 0), 1/2)
902
        self.assertAlmostEqual(dist_manhattan(NONQ_TO, NONQ_FROM, 0), 1/2)
903
904
905
class EuclideanTestCases(unittest.TestCase):
906
    """Test Euclidean functions.
907
908
    abydos.distance.euclidean, sim_euclidean & .dist_euclidean
909
    """
910
911
    def test_euclidean(self):
912
        """Test abydos.distance.euclidean."""
913
        self.assertEqual(euclidean('', ''), 0)
914
        self.assertEqual(euclidean('nelson', ''), 7**0.5)
915
        self.assertEqual(euclidean('', 'neilsen'), 8**0.5)
916
        self.assertAlmostEqual(euclidean('nelson', 'neilsen'), 7**0.5)
917
918
        self.assertEqual(euclidean('', '', 2), 0)
919
        self.assertEqual(euclidean('nelson', '', 2), 7**0.5)
920
        self.assertEqual(euclidean('', 'neilsen', 2), 8**0.5)
921
        self.assertAlmostEqual(euclidean('nelson', 'neilsen', 2), 7**0.5)
922
923
        # supplied q-gram tests
924
        self.assertEqual(euclidean(QGrams(''), QGrams('')), 0)
925
        self.assertEqual(euclidean(QGrams('nelson'), QGrams('')), 7**0.5)
926
        self.assertEqual(euclidean(QGrams(''), QGrams('neilsen')), 8**0.5)
927
        self.assertAlmostEqual(euclidean(QGrams('nelson'),
928
                                         QGrams('neilsen')), 7**0.5)
929
930
        # non-q-gram tests
931
        self.assertEqual(euclidean('', '', 0), 0)
932
        self.assertEqual(euclidean('the quick', '', 0), 2**0.5)
933
        self.assertEqual(euclidean('', 'the quick', 0), 2**0.5)
934
        self.assertAlmostEqual(euclidean(NONQ_FROM, NONQ_TO, 0), 8**0.5)
935
        self.assertAlmostEqual(euclidean(NONQ_TO, NONQ_FROM, 0), 8**0.5)
936
937
    def test_sim_euclidean(self):
938
        """Test abydos.distance.sim_euclidean."""
939
        self.assertEqual(sim_euclidean('', ''), 1)
940
        self.assertEqual(sim_euclidean('nelson', ''), 0)
941
        self.assertEqual(sim_euclidean('', 'neilsen'), 0)
942
        self.assertAlmostEqual(sim_euclidean('nelson', 'neilsen'),
943
                               1-7**0.5/23**0.5)
944
945
        self.assertEqual(sim_euclidean('', '', 2), 1)
946
        self.assertEqual(sim_euclidean('nelson', '', 2), 0)
947
        self.assertEqual(sim_euclidean('', 'neilsen', 2), 0)
948
        self.assertAlmostEqual(sim_euclidean('nelson', 'neilsen', 2),
949
                               1-7**0.5/23**0.5)
950
951
        # supplied q-gram tests
952
        self.assertEqual(sim_euclidean(QGrams(''), QGrams('')), 1)
953
        self.assertEqual(sim_euclidean(QGrams('nelson'), QGrams('')), 0)
954
        self.assertEqual(sim_euclidean(QGrams(''), QGrams('neilsen')), 0)
955
        self.assertAlmostEqual(sim_euclidean(QGrams('nelson'),
956
                                             QGrams('neilsen')),
957
                               1-7**0.5/23**0.5)
958
959
        # non-q-gram tests
960
        self.assertEqual(sim_euclidean('', '', 0), 1)
961
        self.assertEqual(sim_euclidean('the quick', '', 0), 0)
962
        self.assertEqual(sim_euclidean('', 'the quick', 0), 0)
963
        self.assertAlmostEqual(sim_euclidean(NONQ_FROM, NONQ_TO, 0),
964
                               1-8**0.5/24**0.5)
965
        self.assertAlmostEqual(sim_euclidean(NONQ_TO, NONQ_FROM, 0),
966
                               1-8**0.5/24**0.5)
967
968
    def test_dist_euclidean(self):
969
        """Test abydos.distance.dist_euclidean."""
970
        self.assertEqual(dist_euclidean('', ''), 0)
971
        self.assertEqual(dist_euclidean('nelson', ''), 1)
972
        self.assertEqual(dist_euclidean('', 'neilsen'), 1)
973
        self.assertAlmostEqual(dist_euclidean('nelson', 'neilsen'),
974
                               7**0.5 / 23**0.5)
975
976
        self.assertEqual(dist_euclidean('', '', 2), 0)
977
        self.assertEqual(dist_euclidean('nelson', '', 2), 1)
978
        self.assertEqual(dist_euclidean('', 'neilsen', 2), 1)
979
        self.assertAlmostEqual(dist_euclidean('nelson', 'neilsen', 2),
980
                               7**0.5 / 23**0.5)
981
982
        # supplied q-gram tests
983
        self.assertEqual(dist_euclidean(QGrams(''), QGrams('')), 0)
984
        self.assertEqual(dist_euclidean(QGrams('nelson'), QGrams('')), 1)
985
        self.assertEqual(dist_euclidean(QGrams(''), QGrams('neilsen')), 1)
986
        self.assertAlmostEqual(dist_euclidean(QGrams('nelson'),
987
                                              QGrams('neilsen')),
988
                               7**0.5 / 23**0.5)
989
990
        # non-q-gram tests
991
        self.assertEqual(dist_euclidean('', '', 0), 0)
992
        self.assertEqual(dist_euclidean('the quick', '', 0), 1)
993
        self.assertEqual(dist_euclidean('', 'the quick', 0), 1)
994
        self.assertAlmostEqual(dist_euclidean(NONQ_FROM, NONQ_TO, 0),
995
                               8**0.5/24**0.5)
996
        self.assertAlmostEqual(dist_euclidean(NONQ_TO, NONQ_FROM, 0),
997
                               8**0.5/24**0.5)
998
999
1000
class ChebyshevTestCases(unittest.TestCase):
1001
    """Test Chebyshev functions.
1002
1003
    abydos.distance.chebyshev, sim_chebyshev & .dist_chebyshev
1004
    """
1005
1006
    def test_chebyshev(self):
1007
        """Test abydos.distance.chebyshev."""
1008
        self.assertEqual(chebyshev('', ''), 0)
1009
        self.assertEqual(chebyshev('nelson', ''), 1)
1010
        self.assertEqual(chebyshev('', 'neilsen'), 1)
1011
        self.assertEqual(chebyshev('nelson', 'neilsen'), 1)
1012
1013
        self.assertEqual(chebyshev('', '', 2), 0)
1014
        self.assertEqual(chebyshev('nelson', '', 2), 1)
1015
        self.assertEqual(chebyshev('', 'neilsen', 2), 1)
1016
        self.assertAlmostEqual(chebyshev('nelson', 'neilsen', 2), 1)
1017
1018
        # supplied q-gram tests
1019
        self.assertEqual(chebyshev(QGrams(''), QGrams('')), 0)
1020
        self.assertEqual(chebyshev(QGrams('nelson'), QGrams('')), 1)
1021
        self.assertEqual(chebyshev(QGrams(''), QGrams('neilsen')), 1)
1022
        self.assertAlmostEqual(chebyshev(QGrams('nelson'),
1023
                                         QGrams('neilsen')), 1)
1024
1025
        # non-q-gram tests
1026
        self.assertEqual(chebyshev('', '', 0), 0)
1027
        self.assertEqual(chebyshev('the quick', '', 0), 1)
1028
        self.assertEqual(chebyshev('', 'the quick', 0), 1)
1029
        self.assertAlmostEqual(chebyshev(NONQ_FROM, NONQ_TO, 0), 1)
1030
        self.assertAlmostEqual(chebyshev(NONQ_TO, NONQ_FROM, 0), 1)
1031
1032
1033
class CosineSimilarityTestCases(unittest.TestCase):
1034
    """Test cosine similarity functions.
1035
1036
    abydos.distance.sim_cosine & .dist_cosine
1037
    """
1038
1039
    def test_sim_cosine(self):
1040
        """Test abydos.distance.sim_cosine."""
1041
        self.assertEqual(sim_cosine('', ''), 1)
1042
        self.assertEqual(sim_cosine('nelson', ''), 0)
1043
        self.assertEqual(sim_cosine('', 'neilsen'), 0)
1044
        self.assertAlmostEqual(sim_cosine('nelson', 'neilsen'),
1045
                               4/math.sqrt(7*8))
1046
1047
        self.assertEqual(sim_cosine('', '', 2), 1)
1048
        self.assertEqual(sim_cosine('nelson', '', 2), 0)
1049
        self.assertEqual(sim_cosine('', 'neilsen', 2), 0)
1050
        self.assertAlmostEqual(sim_cosine('nelson', 'neilsen', 2),
1051
                               4/math.sqrt(7*8))
1052
1053
        # supplied q-gram tests
1054
        self.assertEqual(sim_cosine(QGrams(''), QGrams('')), 1)
1055
        self.assertEqual(sim_cosine(QGrams('nelson'), QGrams('')), 0)
1056
        self.assertEqual(sim_cosine(QGrams(''), QGrams('neilsen')), 0)
1057
        self.assertAlmostEqual(sim_cosine(QGrams('nelson'), QGrams('neilsen')),
1058
                               4/math.sqrt(7*8))
1059
1060
        # non-q-gram tests
1061
        self.assertEqual(sim_cosine('', '', 0), 1)
1062
        self.assertEqual(sim_cosine('the quick', '', 0), 0)
1063
        self.assertEqual(sim_cosine('', 'the quick', 0), 0)
1064
        self.assertAlmostEqual(sim_cosine(NONQ_FROM, NONQ_TO, 0),
1065
                               4/math.sqrt(9*7))
1066
        self.assertAlmostEqual(sim_cosine(NONQ_TO, NONQ_FROM, 0),
1067
                               4/math.sqrt(9*7))
1068
1069
    def test_dist_cosine(self):
1070
        """Test abydos.distance.dist_cosine."""
1071
        self.assertEqual(dist_cosine('', ''), 0)
1072
        self.assertEqual(dist_cosine('nelson', ''), 1)
1073
        self.assertEqual(dist_cosine('', 'neilsen'), 1)
1074
        self.assertAlmostEqual(dist_cosine('nelson', 'neilsen'),
1075
                               1-(4/math.sqrt(7*8)))
1076
1077
        self.assertEqual(dist_cosine('', '', 2), 0)
1078
        self.assertEqual(dist_cosine('nelson', '', 2), 1)
1079
        self.assertEqual(dist_cosine('', 'neilsen', 2), 1)
1080
        self.assertAlmostEqual(dist_cosine('nelson', 'neilsen', 2),
1081
                               1-(4/math.sqrt(7*8)))
1082
1083
        # supplied q-gram tests
1084
        self.assertEqual(dist_cosine(QGrams(''), QGrams('')), 0)
1085
        self.assertEqual(dist_cosine(QGrams('nelson'), QGrams('')), 1)
1086
        self.assertEqual(dist_cosine(QGrams(''), QGrams('neilsen')), 1)
1087
        self.assertAlmostEqual(dist_cosine(QGrams('nelson'),
1088
                                           QGrams('neilsen')),
1089
                               1-(4/math.sqrt(7*8)))
1090
1091
        # non-q-gram tests
1092
        self.assertEqual(dist_cosine('', '', 0), 0)
1093
        self.assertEqual(dist_cosine('the quick', '', 0), 1)
1094
        self.assertEqual(dist_cosine('', 'the quick', 0), 1)
1095
        self.assertAlmostEqual(dist_cosine(NONQ_FROM, NONQ_TO, 0),
1096
                               1-4/math.sqrt(9*7))
1097
        self.assertAlmostEqual(dist_cosine(NONQ_TO, NONQ_FROM, 0),
1098
                               1-4/math.sqrt(9*7))
1099
1100
1101
class JaroWinklerTestCases(unittest.TestCase):
1102
    """Test Jaro(-Winkler) functions.
1103
1104
    abydos.distance.sim_strcmp95, .dist_strcmp95, .sim_jaro_winkler, &
1105
    .dist_jaro_winkler
1106
    """
1107
1108
    def test_sim_strcmp95(self):
1109
        """Test abydos.distance.sim_strcmp95."""
1110
        self.assertEqual(sim_strcmp95('', ''), 1)
1111
        self.assertEqual(sim_strcmp95('MARTHA', ''), 0)
1112
        self.assertEqual(sim_strcmp95('', 'MARTHA'), 0)
1113
        self.assertEqual(sim_strcmp95('MARTHA', 'MARTHA'), 1)
1114
1115
        self.assertAlmostEqual(sim_strcmp95('MARTHA', 'MARHTA'), 0.96111111)
1116
        self.assertAlmostEqual(sim_strcmp95('DWAYNE', 'DUANE'), 0.873)
1117
        self.assertAlmostEqual(sim_strcmp95('DIXON', 'DICKSONX'), 0.839333333)
1118
1119
        self.assertAlmostEqual(sim_strcmp95('ABCD', 'EFGH'), 0.0)
1120
1121
        # long_strings = True
1122
        self.assertAlmostEqual(sim_strcmp95('DIXON', 'DICKSONX', True),
1123
                               0.85393939)
1124
        self.assertAlmostEqual(sim_strcmp95('DWAYNE', 'DUANE', True),
1125
                               0.89609090)
1126
        self.assertAlmostEqual(sim_strcmp95('MARTHA', 'MARHTA', True),
1127
                               0.97083333)
1128
1129
        # cover case where we don't boost, etc.
1130
        self.assertAlmostEqual(sim_strcmp95('A', 'ABCDEFGHIJK'), 69/99)
1131
        self.assertAlmostEqual(sim_strcmp95('A', 'ABCDEFGHIJK', True), 69 / 99)
1132
        self.assertAlmostEqual(sim_strcmp95('d', 'abcdefgh'), 0.708333333)
1133
        self.assertAlmostEqual(sim_strcmp95('d', 'abcdefgh', True),
1134
                               0.708333333)
1135
        self.assertAlmostEqual(sim_strcmp95('1', 'abc1efgh', True),
1136
                               0.708333333)
1137
        self.assertAlmostEqual(sim_strcmp95('12hundredths', '12hundred', True),
1138
                               0.916666667)
1139
1140
    def test_dist_strcmp95(self):
1141
        """Test abydos.distance.dist_strcmp95."""
1142
        self.assertEqual(dist_strcmp95('', ''), 0)
1143
        self.assertEqual(dist_strcmp95('MARTHA', ''), 1)
1144
        self.assertEqual(dist_strcmp95('', 'MARTHA'), 1)
1145
        self.assertEqual(dist_strcmp95('MARTHA', 'MARTHA'), 0)
1146
1147
        self.assertAlmostEqual(dist_strcmp95('MARTHA', 'MARHTA'), 0.03888888)
1148
        self.assertAlmostEqual(dist_strcmp95('DWAYNE', 'DUANE'), 0.127)
1149
        self.assertAlmostEqual(dist_strcmp95('DIXON', 'DICKSONX'), 0.160666666)
1150
1151
        self.assertAlmostEqual(dist_strcmp95('ABCD', 'EFGH'), 1.0)
1152
1153
    def test_sim_jaro_winkler(self):
1154
        """Test abydos.distance.sim_jaro_winkler."""
1155
        self.assertEqual(sim_jaro_winkler('', '', mode='jaro'), 1)
1156
        self.assertEqual(sim_jaro_winkler('', '', mode='winkler'), 1)
1157
        self.assertEqual(sim_jaro_winkler('MARTHA', '', mode='jaro'), 0)
1158
        self.assertEqual(sim_jaro_winkler('MARTHA', '', mode='winkler'), 0)
1159
        self.assertEqual(sim_jaro_winkler('', 'MARHTA', mode='jaro'), 0)
1160
        self.assertEqual(sim_jaro_winkler('', 'MARHTA', mode='winkler'), 0)
1161
        self.assertEqual(sim_jaro_winkler('MARTHA', 'MARTHA', mode='jaro'), 1)
1162
        self.assertEqual(sim_jaro_winkler('MARTHA', 'MARTHA', mode='winkler'),
1163
                         1)
1164
1165
        # https://en.wikipedia.org/wiki/Jaro-Winkler_distance
1166
        self.assertAlmostEqual(sim_jaro_winkler('MARTHA', 'MARHTA',
1167
                                                mode='jaro'), 0.94444444)
1168
        self.assertAlmostEqual(sim_jaro_winkler('MARTHA', 'MARHTA',
1169
                                                mode='winkler'), 0.96111111)
1170
        self.assertAlmostEqual(sim_jaro_winkler('DWAYNE', 'DUANE',
1171
                                                mode='jaro'), 0.82222222)
1172
        self.assertAlmostEqual(sim_jaro_winkler('DWAYNE', 'DUANE',
1173
                                                mode='winkler'), 0.84)
1174
        self.assertAlmostEqual(sim_jaro_winkler('DIXON', 'DICKSONX',
1175
                                                mode='jaro'), 0.76666666)
1176
        self.assertAlmostEqual(sim_jaro_winkler('DIXON', 'DICKSONX',
1177
                                                mode='winkler'), 0.81333333)
1178
1179
        self.assertRaises(ValueError, sim_jaro_winkler, 'abcd', 'dcba',
1180
                          boost_threshold=2)
1181
        self.assertRaises(ValueError, sim_jaro_winkler, 'abcd', 'dcba',
1182
                          boost_threshold=-1)
1183
        self.assertRaises(ValueError, sim_jaro_winkler, 'abcd', 'dcba',
1184
                          scaling_factor=0.3)
1185
        self.assertRaises(ValueError, sim_jaro_winkler, 'abcd', 'dcba',
1186
                          scaling_factor=-1)
1187
1188
        self.assertAlmostEqual(sim_jaro_winkler('ABCD', 'EFGH'), 0.0)
1189
1190
        # long_strings = True (applies only to Jaro-Winkler, not Jaro)
1191
        self.assertEqual(sim_jaro_winkler('ABCD', 'EFGH', long_strings=True),
1192
                         sim_jaro_winkler('ABCD', 'EFGH'))
1193
        self.assertEqual(sim_jaro_winkler('DIXON', 'DICKSONX', mode='jaro',
1194
                                          long_strings=True),
1195
                         sim_jaro_winkler('DIXON', 'DICKSONX',
1196
                                          mode='jaro'))
1197
        self.assertAlmostEqual(sim_jaro_winkler('DIXON', 'DICKSONX',
1198
                                                mode='winkler',
1199
                                                long_strings=True), 0.83030303)
1200
        self.assertAlmostEqual(sim_jaro_winkler('MARTHA', 'MARHTA',
1201
                                                mode='winkler',
1202
                                                long_strings=True), 0.97083333)
1203
1204
    def test_dist_jaro_winkler(self):
1205
        """Test abydos.distance.dist_jaro_winkler."""
1206
        self.assertEqual(dist_jaro_winkler('', '', mode='jaro'), 0)
1207
        self.assertEqual(dist_jaro_winkler('', '', mode='winkler'), 0)
1208
        self.assertEqual(dist_jaro_winkler('MARTHA', '', mode='jaro'), 1)
1209
        self.assertEqual(dist_jaro_winkler('MARTHA', '', mode='winkler'), 1)
1210
        self.assertEqual(dist_jaro_winkler('', 'MARHTA', mode='jaro'), 1)
1211
        self.assertEqual(dist_jaro_winkler('', 'MARHTA', mode='winkler'), 1)
1212
        self.assertEqual(dist_jaro_winkler('MARTHA', 'MARTHA', mode='jaro'), 0)
1213
        self.assertEqual(dist_jaro_winkler('MARTHA', 'MARTHA', mode='winkler'),
1214
                         0)
1215
1216
        # https://en.wikipedia.org/wiki/Jaro-Winkler_distance
1217
        self.assertAlmostEqual(dist_jaro_winkler('MARTHA', 'MARHTA',
1218
                                                 mode='jaro'), 0.05555555)
1219
        self.assertAlmostEqual(dist_jaro_winkler('MARTHA', 'MARHTA',
1220
                                                 mode='winkler'), 0.03888888)
1221
        self.assertAlmostEqual(dist_jaro_winkler('DWAYNE', 'DUANE',
1222
                                                 mode='jaro'), 0.17777777)
1223
        self.assertAlmostEqual(dist_jaro_winkler('DWAYNE', 'DUANE',
1224
                                                 mode='winkler'), 0.16)
1225
        self.assertAlmostEqual(dist_jaro_winkler('DIXON', 'DICKSONX',
1226
                                                 mode='jaro'), 0.23333333)
1227
        self.assertAlmostEqual(dist_jaro_winkler('DIXON', 'DICKSONX',
1228
                                                 mode='winkler'), 0.18666666)
1229
1230
        self.assertRaises(ValueError, dist_jaro_winkler, 'abcd', 'dcba',
1231
                          boost_threshold=2)
1232
        self.assertRaises(ValueError, dist_jaro_winkler, 'abcd', 'dcba',
1233
                          boost_threshold=-1)
1234
        self.assertRaises(ValueError, dist_jaro_winkler, 'abcd', 'dcba',
1235
                          scaling_factor=0.3)
1236
        self.assertRaises(ValueError, dist_jaro_winkler, 'abcd', 'dcba',
1237
                          scaling_factor=-1)
1238
1239
        self.assertAlmostEqual(dist_jaro_winkler('ABCD', 'EFGH'), 1.0)
1240
1241
1242
class LcsseqTestCases(unittest.TestCase):
1243
    """Test LCSseq functions.
1244
1245
    abydos.distance.lcsseq, .sim_lcsseq, & .dist_lcsseq
1246
    """
1247
1248
    def test_lcsseq(self):
1249
        """Test abydos.distance.lcsseq."""
1250
        self.assertEqual(lcsseq('', ''), '')
1251
        self.assertEqual(lcsseq('A', ''), '')
1252
        self.assertEqual(lcsseq('', 'A'), '')
1253
        self.assertEqual(lcsseq('A', 'A'), 'A')
1254
        self.assertEqual(lcsseq('ABCD', ''), '')
1255
        self.assertEqual(lcsseq('', 'ABCD'), '')
1256
        self.assertEqual(lcsseq('ABCD', 'ABCD'), 'ABCD')
1257
        self.assertEqual(lcsseq('ABCD', 'BC'), 'BC')
1258
        self.assertEqual(lcsseq('ABCD', 'AD'), 'AD')
1259
        self.assertEqual(lcsseq('ABCD', 'AC'), 'AC')
1260
        self.assertEqual(lcsseq('AB', 'CD'), '')
1261
        self.assertEqual(lcsseq('ABC', 'BCD'), 'BC')
1262
1263
        self.assertEqual(lcsseq('DIXON', 'DICKSONX'), 'DION')
1264
1265
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1266
        self.assertEqual(lcsseq('AGCAT', 'GAC'), 'AC')
1267
        self.assertEqual(lcsseq('XMJYAUZ', 'MZJAWXU'), 'MJAU')
1268
1269
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1270
        self.assertEqual(lcsseq('hell', 'hello'), 'hell')
1271
        self.assertEqual(lcsseq('hello', 'hell'), 'hell')
1272
        self.assertEqual(lcsseq('ell', 'hell'), 'ell')
1273
        self.assertEqual(lcsseq('hell', 'ell'), 'ell')
1274
        self.assertEqual(lcsseq('faxbcd', 'abdef'), 'abd')
1275
1276
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1277
        self.assertEqual(lcsseq('hello world', 'world war 2'), 'world')
1278
        self.assertEqual(lcsseq('foo bar', 'bar foo'), 'foo')
1279
        self.assertEqual(lcsseq('aaa', 'aa'), 'aa')
1280
        self.assertEqual(lcsseq('cc', 'bbbbcccccc'), 'cc')
1281
        self.assertEqual(lcsseq('ccc', 'bcbb'), 'c')
1282
1283
    def test_sim_lcsseq(self):
1284
        """Test abydos.distance.sim_lcsseq."""
1285
        self.assertEqual(sim_lcsseq('', ''), 1)
1286
        self.assertEqual(sim_lcsseq('A', ''), 0)
1287
        self.assertEqual(sim_lcsseq('', 'A'), 0)
1288
        self.assertEqual(sim_lcsseq('A', 'A'), 1)
1289
        self.assertEqual(sim_lcsseq('ABCD', ''), 0)
1290
        self.assertEqual(sim_lcsseq('', 'ABCD'), 0)
1291
        self.assertEqual(sim_lcsseq('ABCD', 'ABCD'), 1)
1292
        self.assertAlmostEqual(sim_lcsseq('ABCD', 'BC'), 2/4)
1293
        self.assertAlmostEqual(sim_lcsseq('ABCD', 'AD'), 2/4)
1294
        self.assertAlmostEqual(sim_lcsseq('ABCD', 'AC'), 2/4)
1295
        self.assertAlmostEqual(sim_lcsseq('AB', 'CD'), 0)
1296
        self.assertAlmostEqual(sim_lcsseq('ABC', 'BCD'), 2/3)
1297
1298
        self.assertAlmostEqual(sim_lcsseq('DIXON', 'DICKSONX'), 4/8)
1299
1300
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1301
        self.assertAlmostEqual(sim_lcsseq('AGCAT', 'GAC'), 2/5)
1302
        self.assertAlmostEqual(sim_lcsseq('XMJYAUZ', 'MZJAWXU'), 4/7)
1303
1304
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1305
        self.assertAlmostEqual(sim_lcsseq('hell', 'hello'), 4/5)
1306
        self.assertAlmostEqual(sim_lcsseq('hello', 'hell'), 4/5)
1307
        self.assertAlmostEqual(sim_lcsseq('ell', 'hell'), 3/4)
1308
        self.assertAlmostEqual(sim_lcsseq('hell', 'ell'), 3/4)
1309
        self.assertAlmostEqual(sim_lcsseq('faxbcd', 'abdef'), 3/6)
1310
1311
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1312
        self.assertAlmostEqual(sim_lcsseq('hello world', 'world war 2'), 5/11)
1313
        self.assertAlmostEqual(sim_lcsseq('foo bar', 'bar foo'), 3/7)
1314
        self.assertAlmostEqual(sim_lcsseq('aaa', 'aa'), 2/3)
1315
        self.assertAlmostEqual(sim_lcsseq('cc', 'bbbbcccccc'), 2/10)
1316
        self.assertAlmostEqual(sim_lcsseq('ccc', 'bcbb'), 1/4)
1317
1318
    def test_dist_lcsseq(self):
1319
        """Test abydos.distance.dist_lcsseq."""
1320
        self.assertEqual(dist_lcsseq('', ''), 0)
1321
        self.assertEqual(dist_lcsseq('A', ''), 1)
1322
        self.assertEqual(dist_lcsseq('', 'A'), 1)
1323
        self.assertEqual(dist_lcsseq('A', 'A'), 0)
1324
        self.assertEqual(dist_lcsseq('ABCD', ''), 1)
1325
        self.assertEqual(dist_lcsseq('', 'ABCD'), 1)
1326
        self.assertEqual(dist_lcsseq('ABCD', 'ABCD'), 0)
1327
        self.assertAlmostEqual(dist_lcsseq('ABCD', 'BC'), 2/4)
1328
        self.assertAlmostEqual(dist_lcsseq('ABCD', 'AD'), 2/4)
1329
        self.assertAlmostEqual(dist_lcsseq('ABCD', 'AC'), 2/4)
1330
        self.assertAlmostEqual(dist_lcsseq('AB', 'CD'), 1)
1331
        self.assertAlmostEqual(dist_lcsseq('ABC', 'BCD'), 1/3)
1332
1333
        self.assertAlmostEqual(dist_lcsseq('DIXON', 'DICKSONX'), 4/8)
1334
1335
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1336
        self.assertAlmostEqual(dist_lcsseq('AGCAT', 'GAC'), 3/5)
1337
        self.assertAlmostEqual(dist_lcsseq('XMJYAUZ', 'MZJAWXU'), 3/7)
1338
1339
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1340
        self.assertAlmostEqual(dist_lcsseq('hell', 'hello'), 1/5)
1341
        self.assertAlmostEqual(dist_lcsseq('hello', 'hell'), 1/5)
1342
        self.assertAlmostEqual(dist_lcsseq('ell', 'hell'), 1/4)
1343
        self.assertAlmostEqual(dist_lcsseq('hell', 'ell'), 1/4)
1344
        self.assertAlmostEqual(dist_lcsseq('faxbcd', 'abdef'), 3/6)
1345
1346
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1347
        self.assertAlmostEqual(dist_lcsseq('hello world', 'world war 2'), 6/11)
1348
        self.assertAlmostEqual(dist_lcsseq('foo bar', 'bar foo'), 4/7)
1349
        self.assertAlmostEqual(dist_lcsseq('aaa', 'aa'), 1/3)
1350
        self.assertAlmostEqual(dist_lcsseq('cc', 'bbbbcccccc'), 8/10)
1351
        self.assertAlmostEqual(dist_lcsseq('ccc', 'bcbb'), 3/4)
1352
1353
1354
class LcsstrTestCases(unittest.TestCase):
1355
    """Test LCSstr functions.
1356
1357
    abydos.distance.lcsstr, .sim_lcsstr, & .dist_lcsstr
1358
    """
1359
1360
    def test_lcsstr(self):
1361
        """Test abydos.distance.lcsstr."""
1362
        self.assertEqual(lcsstr('', ''), '')
1363
        self.assertEqual(lcsstr('A', ''), '')
1364
        self.assertEqual(lcsstr('', 'A'), '')
1365
        self.assertEqual(lcsstr('A', 'A'), 'A')
1366
        self.assertEqual(lcsstr('ABCD', ''), '')
1367
        self.assertEqual(lcsstr('', 'ABCD'), '')
1368
        self.assertEqual(lcsstr('ABCD', 'ABCD'), 'ABCD')
1369
        self.assertEqual(lcsstr('ABCD', 'BC'), 'BC')
1370
        self.assertEqual(lcsstr('ABCD', 'AD'), 'A')
1371
        self.assertEqual(lcsstr('ABCD', 'AC'), 'A')
1372
        self.assertEqual(lcsstr('AB', 'CD'), '')
1373
        self.assertEqual(lcsstr('ABC', 'BCD'), 'BC')
1374
1375
        self.assertEqual(lcsstr('DIXON', 'DICKSONX'), 'DI')
1376
1377
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1378
        self.assertEqual(lcsstr('AGCAT', 'GAC'), 'A')
1379
        self.assertEqual(lcsstr('XMJYAUZ', 'MZJAWXU'), 'X')
1380
1381
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1382
        self.assertEqual(lcsstr('hell', 'hello'), 'hell')
1383
        self.assertEqual(lcsstr('hello', 'hell'), 'hell')
1384
        self.assertEqual(lcsstr('ell', 'hell'), 'ell')
1385
        self.assertEqual(lcsstr('hell', 'ell'), 'ell')
1386
        self.assertEqual(lcsstr('faxbcd', 'abdef'), 'f')
1387
1388
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1389
        self.assertEqual(lcsstr('hello world', 'world war 2'), 'world')
1390
        self.assertEqual(lcsstr('foo bar', 'bar foo'), 'foo')
1391
        self.assertEqual(lcsstr('aaa', 'aa'), 'aa')
1392
        self.assertEqual(lcsstr('cc', 'bbbbcccccc'), 'cc')
1393
        self.assertEqual(lcsstr('ccc', 'bcbb'), 'c')
1394
1395
        # http://www.maplesoft.com/support/help/Maple/view.aspx?path=StringTools/LongestCommonSubString
1396
        self.assertEqual(lcsstr('abax', 'bax'), 'bax')
1397
        self.assertEqual(lcsstr('tsaxbaxyz', 'axcaxy'), 'axy')
1398
        self.assertEqual(lcsstr('abcde', 'uvabxycde'), 'cde')
1399
        self.assertEqual(lcsstr('abc', 'xyz'), '')
1400
        self.assertEqual(lcsstr('TAAGGTCGGCGCGCACGCTGGCGAGTATGGTGCGGAGGCCCTGGA\
1401
GAGGTGAGGCTCCCTCCCCTGCTCCGACCCGGGCTCCTCGCCCGCCCGGACCCAC', 'AAGCGCCGCGCAGTCTGGG\
1402
CTCCGCACACTTCTGGTCCAGTCCGACTGAGAAGGAACCACCATGGTGCTGTCTCCCGCTGACAAGACCAACATCAAG\
1403
ACTGCCTGGGAAAAGATCGGCAGCCACGGTGGCGAGTATGGCGCCGAGGCCGT'), 'TGGCGAGTATGG')
1404
1405
    def test_sim_lcsstr(self):
1406
        """Test abydos.distance.sim_lcsstr."""
1407
        self.assertEqual(sim_lcsstr('', ''), 1)
1408
        self.assertEqual(sim_lcsstr('A', ''), 0)
1409
        self.assertEqual(sim_lcsstr('', 'A'), 0)
1410
        self.assertEqual(sim_lcsstr('A', 'A'), 1)
1411
        self.assertEqual(sim_lcsstr('ABCD', ''), 0)
1412
        self.assertEqual(sim_lcsstr('', 'ABCD'), 0)
1413
        self.assertEqual(sim_lcsstr('ABCD', 'ABCD'), 1)
1414
        self.assertAlmostEqual(sim_lcsstr('ABCD', 'BC'), 2/4)
1415
        self.assertAlmostEqual(sim_lcsstr('ABCD', 'AD'), 1/4)
1416
        self.assertAlmostEqual(sim_lcsstr('ABCD', 'AC'), 1/4)
1417
        self.assertAlmostEqual(sim_lcsstr('AB', 'CD'), 0)
1418
        self.assertAlmostEqual(sim_lcsstr('ABC', 'BCD'), 2/3)
1419
1420
        self.assertAlmostEqual(sim_lcsstr('DIXON', 'DICKSONX'), 2/8)
1421
1422
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1423
        self.assertAlmostEqual(sim_lcsstr('AGCAT', 'GAC'), 1/5)
1424
        self.assertAlmostEqual(sim_lcsstr('XMJYAUZ', 'MZJAWXU'), 1/7)
1425
1426
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1427
        self.assertAlmostEqual(sim_lcsstr('hell', 'hello'), 4/5)
1428
        self.assertAlmostEqual(sim_lcsstr('hello', 'hell'), 4/5)
1429
        self.assertAlmostEqual(sim_lcsstr('ell', 'hell'), 3/4)
1430
        self.assertAlmostEqual(sim_lcsstr('hell', 'ell'), 3/4)
1431
        self.assertAlmostEqual(sim_lcsstr('faxbcd', 'abdef'), 1/6)
1432
1433
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1434
        self.assertAlmostEqual(sim_lcsstr('hello world', 'world war 2'), 5/11)
1435
        self.assertAlmostEqual(sim_lcsstr('foo bar', 'bar foo'), 3/7)
1436
        self.assertAlmostEqual(sim_lcsstr('aaa', 'aa'), 2/3)
1437
        self.assertAlmostEqual(sim_lcsstr('cc', 'bbbbcccccc'), 2/10)
1438
        self.assertAlmostEqual(sim_lcsstr('ccc', 'bcbb'), 1/4)
1439
1440
    def test_dist_lcsstr(self):
1441
        """Test abydos.distance.dist_lcsstr."""
1442
        self.assertEqual(dist_lcsstr('', ''), 0)
1443
        self.assertEqual(dist_lcsstr('A', ''), 1)
1444
        self.assertEqual(dist_lcsstr('', 'A'), 1)
1445
        self.assertEqual(dist_lcsstr('A', 'A'), 0)
1446
        self.assertEqual(dist_lcsstr('ABCD', ''), 1)
1447
        self.assertEqual(dist_lcsstr('', 'ABCD'), 1)
1448
        self.assertEqual(dist_lcsstr('ABCD', 'ABCD'), 0)
1449
        self.assertAlmostEqual(dist_lcsstr('ABCD', 'BC'), 2/4)
1450
        self.assertAlmostEqual(dist_lcsstr('ABCD', 'AD'), 3/4)
1451
        self.assertAlmostEqual(dist_lcsstr('ABCD', 'AC'), 3/4)
1452
        self.assertAlmostEqual(dist_lcsstr('AB', 'CD'), 1)
1453
        self.assertAlmostEqual(dist_lcsstr('ABC', 'BCD'), 1/3)
1454
1455
        self.assertAlmostEqual(dist_lcsstr('DIXON', 'DICKSONX'), 6/8)
1456
1457
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1458
        self.assertAlmostEqual(dist_lcsstr('AGCAT', 'GAC'), 4/5)
1459
        self.assertAlmostEqual(dist_lcsstr('XMJYAUZ', 'MZJAWXU'), 6/7)
1460
1461
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1462
        self.assertAlmostEqual(dist_lcsstr('hell', 'hello'), 1/5)
1463
        self.assertAlmostEqual(dist_lcsstr('hello', 'hell'), 1/5)
1464
        self.assertAlmostEqual(dist_lcsstr('ell', 'hell'), 1/4)
1465
        self.assertAlmostEqual(dist_lcsstr('hell', 'ell'), 1/4)
1466
        self.assertAlmostEqual(dist_lcsstr('faxbcd', 'abdef'), 5/6)
1467
1468
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1469
        self.assertAlmostEqual(dist_lcsstr('hello world', 'world war 2'), 6/11)
1470
        self.assertAlmostEqual(dist_lcsstr('foo bar', 'bar foo'), 4/7)
1471
        self.assertAlmostEqual(dist_lcsstr('aaa', 'aa'), 1/3)
1472
        self.assertAlmostEqual(dist_lcsstr('cc', 'bbbbcccccc'), 8/10)
1473
        self.assertAlmostEqual(dist_lcsstr('ccc', 'bcbb'), 3/4)
1474
1475
1476
class RatcliffObershelpTestCases(unittest.TestCase):
1477
    """Test Ratcliff-Obserhelp functions.
1478
1479
    abydos.distance.sim_ratcliff_obershelp, &
1480
    abydos.distance.dist_ratcliff_obershelp
1481
    """
1482
1483
    def test_sim_ratcliff_obershelp(self):
1484
        """Test abydos.distance.sim_ratcliff_obershelp."""
1485
        # https://github.com/rockymadden/stringmetric/blob/master/core/src/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala
1486
        self.assertEqual(sim_ratcliff_obershelp('', ''), 1)
1487
        self.assertEqual(sim_ratcliff_obershelp('abc', ''), 0)
1488
        self.assertEqual(sim_ratcliff_obershelp('', 'xyz'), 0)
1489
        self.assertEqual(sim_ratcliff_obershelp('abc', 'abc'), 1)
1490
        self.assertEqual(sim_ratcliff_obershelp('123', '123'), 1)
1491
        self.assertEqual(sim_ratcliff_obershelp('abc', 'xyz'), 0)
1492
        self.assertEqual(sim_ratcliff_obershelp('123', '456'), 0)
1493
        self.assertAlmostEqual(sim_ratcliff_obershelp('aleksander',
1494
                                                      'alexandre'),
1495
                               0.7368421052631579)
1496
        self.assertAlmostEqual(sim_ratcliff_obershelp('alexandre',
1497
                                                      'aleksander'),
1498
                               0.7368421052631579)
1499
        self.assertAlmostEqual(sim_ratcliff_obershelp('pennsylvania',
1500
                                                      'pencilvaneya'),
1501
                               0.6666666666666666)
1502
        self.assertAlmostEqual(sim_ratcliff_obershelp('pencilvaneya',
1503
                                                      'pennsylvania'),
1504
                               0.6666666666666666)
1505
        self.assertAlmostEqual(sim_ratcliff_obershelp('abcefglmn', 'abefglmo'),
1506
                               0.8235294117647058)
1507
        self.assertAlmostEqual(sim_ratcliff_obershelp('abefglmo', 'abcefglmn'),
1508
                               0.8235294117647058)
1509
1510
        with open(TESTDIR+'/corpora/variantNames.csv') as cav_testset:
1511
            next(cav_testset)
1512
            for line in cav_testset:
1513
                line = line.strip().split(',')
1514
                word1, word2 = line[0], line[4]
1515
                self.assertAlmostEqual(sim_ratcliff_obershelp(word1, word2),
1516
                                       SequenceMatcher(None, word1,
1517
                                                       word2).ratio())
1518
1519
        with open(TESTDIR+'/corpora/wikipediaCommonMisspellings.csv') as missp:
1520
            next(missp)
1521
            for line in missp:
1522
                line = line.strip().upper()
1523
                line = ''.join([_ for _ in line.strip() if _ in
1524
                                tuple('ABCDEFGHIJKLMNOPQRSTUVWXYZ,')])
1525
                word1, word2 = line.split(',')
1526
                # print(word1, word2e)
1527
                self.assertAlmostEqual(sim_ratcliff_obershelp(word1, word2),
1528
                                       SequenceMatcher(None, word1,
1529
                                                       word2).ratio())
1530
1531
    def test_dist_ratcliff_obershelp(self):
1532
        """Test abydos.distance.dist_ratcliff_obershelp."""
1533
        # https://github.com/rockymadden/stringmetric/blob/master/core/src/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala
1534
        self.assertEqual(dist_ratcliff_obershelp('', ''), 0)
1535
        self.assertEqual(dist_ratcliff_obershelp('abc', ''), 1)
1536
        self.assertEqual(dist_ratcliff_obershelp('', 'xyz'), 1)
1537
        self.assertEqual(dist_ratcliff_obershelp('abc', 'abc'), 0)
1538
        self.assertEqual(dist_ratcliff_obershelp('123', '123'), 0)
1539
        self.assertEqual(dist_ratcliff_obershelp('abc', 'xyz'), 1)
1540
        self.assertEqual(dist_ratcliff_obershelp('123', '456'), 1)
1541
        self.assertAlmostEqual(dist_ratcliff_obershelp('aleksander',
1542
                                                       'alexandre'),
1543
                               0.2631578947368421)
1544
        self.assertAlmostEqual(dist_ratcliff_obershelp('alexandre',
1545
                                                       'aleksander'),
1546
                               0.2631578947368421)
1547
        self.assertAlmostEqual(dist_ratcliff_obershelp('pennsylvania',
1548
                                                       'pencilvaneya'),
1549
                               0.3333333333333333)
1550
        self.assertAlmostEqual(dist_ratcliff_obershelp('pencilvaneya',
1551
                                                       'pennsylvania'),
1552
                               0.3333333333333333)
1553
        self.assertAlmostEqual(dist_ratcliff_obershelp('abcefglmn',
1554
                                                       'abefglmo'),
1555
                               0.1764705882352941)
1556
        self.assertAlmostEqual(dist_ratcliff_obershelp('abefglmo',
1557
                                                       'abcefglmn'),
1558
                               0.1764705882352941)
1559
1560
1561
class MraTestCases(unittest.TestCase):
1562
    """Test MRA functions.
1563
1564
    abydos.distance.mra_compare, .sim_mra & .dist_mra
1565
    """
1566
1567
    def test_mra_compare(self):
1568
        """Test abydos.distance.mra_compare."""
1569
        self.assertEqual(mra_compare('', ''), 6)
1570
        self.assertEqual(mra_compare('a', 'a'), 6)
1571
        self.assertEqual(mra_compare('abcdefg', 'abcdefg'), 6)
1572
        self.assertEqual(mra_compare('abcdefg', ''), 0)
1573
        self.assertEqual(mra_compare('', 'abcdefg'), 0)
1574
1575
        # https://en.wikipedia.org/wiki/Match_rating_approach
1576
        self.assertEqual(mra_compare('Byrne', 'Boern'), 5)
1577
        self.assertEqual(mra_compare('Smith', 'Smyth'), 5)
1578
        self.assertEqual(mra_compare('Catherine', 'Kathryn'), 4)
1579
1580
        self.assertEqual(mra_compare('ab', 'abcdefgh'), 0)
1581
        self.assertEqual(mra_compare('ab', 'ac'), 5)
1582
        self.assertEqual(mra_compare('abcdefik', 'abcdefgh'), 3)
1583
        self.assertEqual(mra_compare('xyz', 'abc'), 0)
1584
1585
    def test_sim_mra(self):
1586
        """Test abydos.distance.sim_mra."""
1587
        self.assertEqual(sim_mra('', ''), 1)
1588
        self.assertEqual(sim_mra('a', 'a'), 1)
1589
        self.assertEqual(sim_mra('abcdefg', 'abcdefg'), 1)
1590
        self.assertEqual(sim_mra('abcdefg', ''), 0)
1591
        self.assertEqual(sim_mra('', 'abcdefg'), 0)
1592
1593
        # https://en.wikipedia.org/wiki/Match_rating_approach
1594
        self.assertEqual(sim_mra('Byrne', 'Boern'), 5/6)
1595
        self.assertEqual(sim_mra('Smith', 'Smyth'), 5/6)
1596
        self.assertEqual(sim_mra('Catherine', 'Kathryn'), 4/6)
1597
1598
        self.assertEqual(sim_mra('ab', 'abcdefgh'), 0)
1599
        self.assertEqual(sim_mra('ab', 'ac'), 5/6)
1600
        self.assertEqual(sim_mra('abcdefik', 'abcdefgh'), 3/6)
1601
        self.assertEqual(sim_mra('xyz', 'abc'), 0)
1602
1603
    def test_dist_mra(self):
1604
        """Test abydos.distance.dist_mra."""
1605
        self.assertEqual(dist_mra('', ''), 0)
1606
        self.assertEqual(dist_mra('a', 'a'), 0)
1607
        self.assertEqual(dist_mra('abcdefg', 'abcdefg'), 0)
1608
        self.assertEqual(dist_mra('abcdefg', ''), 1)
1609
        self.assertEqual(dist_mra('', 'abcdefg'), 1)
1610
1611
        # https://en.wikipedia.org/wiki/Match_rating_approach
1612
        self.assertAlmostEqual(dist_mra('Byrne', 'Boern'), 1/6)
1613
        self.assertAlmostEqual(dist_mra('Smith', 'Smyth'), 1/6)
1614
        self.assertAlmostEqual(dist_mra('Catherine', 'Kathryn'), 2/6)
1615
1616
        self.assertEqual(dist_mra('ab', 'abcdefgh'), 1)
1617
        self.assertAlmostEqual(dist_mra('ab', 'ac'), 1/6)
1618
        self.assertAlmostEqual(dist_mra('abcdefik', 'abcdefgh'), 3/6)
1619
        self.assertEqual(dist_mra('xyz', 'abc'), 1)
1620
1621
1622
class CompressionTestCases(unittest.TestCase):
1623
    """Test compression distance functions.
1624
1625
    abydos.distance.dist_compression & .sim_compression
1626
    """
1627
1628
    arith_dict = ac_train(' '.join(NIALL))
1629
1630
    def test_dist_compression(self):
1631
        """Test abydos.distance.dist_compression."""
1632
        self.assertEqual(dist_compression('', ''), 0)
1633
        self.assertEqual(dist_compression('', '', 'bzip2'), 0)
1634
        self.assertEqual(dist_compression('', '', 'zlib'), 0)
1635
        self.assertEqual(dist_compression('', '', 'arith'), 0)
1636
        self.assertEqual(dist_compression('', '', 'arith', self.arith_dict), 0)
1637
        self.assertEqual(dist_compression('', '', 'rle'), 0)
1638
        self.assertEqual(dist_compression('', '', 'bwtrle'), 0)
1639
1640
        self.assertGreater(dist_compression('a', ''), 0)
1641
        self.assertGreater(dist_compression('a', '', 'bzip2'), 0)
1642
        self.assertGreater(dist_compression('a', '', 'zlib'), 0)
1643
        self.assertGreater(dist_compression('a', '', 'arith'), 0)
1644
        self.assertGreater(dist_compression('a', '', 'arith', self.arith_dict),
1645
                           0)
1646
        self.assertGreater(dist_compression('a', '', 'rle'), 0)
1647
        self.assertGreater(dist_compression('a', '', 'bwtrle'), 0)
1648
1649
        self.assertGreater(dist_compression('abcdefg', 'fg'), 0)
1650
        self.assertGreater(dist_compression('abcdefg', 'fg', 'bzip2'), 0)
1651
        self.assertGreater(dist_compression('abcdefg', 'fg', 'zlib'), 0)
1652
        self.assertGreater(dist_compression('abcdefg', 'fg', 'arith'), 0)
1653
        self.assertGreater(dist_compression('abcdefg', 'fg', 'rle'), 0)
1654
        self.assertGreater(dist_compression('abcdefg', 'fg', 'bwtrle'), 0)
1655
1656
    def test_dist_compression_arith(self):
1657
        """Test abydos.distance.dist_compression (arithmetric compression)."""
1658
        self.assertAlmostEqual(dist_compression('Niall', 'Neil', 'arith',
1659
                                                self.arith_dict),
1660
                               0.608695652173913)
1661
        self.assertAlmostEqual(dist_compression('Neil', 'Niall', 'arith',
1662
                                                self.arith_dict),
1663
                               0.608695652173913)
1664
        self.assertAlmostEqual(dist_compression('Niall', 'Neil', 'arith'),
1665
                               0.6875)
1666
        self.assertAlmostEqual(dist_compression('Neil', 'Niall', 'arith'),
1667
                               0.6875)
1668
        self.assertAlmostEqual(dist_compression('Njáll', 'Njall', 'arith',
1669
                                                self.arith_dict),
1670
                               0.714285714285714)
1671
        self.assertAlmostEqual(dist_compression('Njall', 'Njáll', 'arith',
1672
                                                self.arith_dict),
1673
                               0.714285714285714)
1674
        self.assertAlmostEqual(dist_compression('Njáll', 'Njall', 'arith'),
1675
                               0.75)
1676
        self.assertAlmostEqual(dist_compression('Njall', 'Njáll', 'arith'),
1677
                               0.75)
1678
1679
    def test_dist_compression_rle(self):
1680
        """Test abydos.distance.dist_compression (RLE & BWT+RLE)."""
1681
        self.assertAlmostEqual(dist_compression('abc', 'abc', 'rle'), 0)
1682
        self.assertAlmostEqual(dist_compression('abc', 'def', 'rle'), 1)
1683
1684
        self.assertAlmostEqual(dist_compression('abc', 'abc', 'bwtrle'), 0)
1685
        self.assertAlmostEqual(dist_compression('abc', 'def', 'bwtrle'), 0.75)
1686
1687
        self.assertAlmostEqual(dist_compression('aaa', 'bbaaa', 'rle'), 0.5)
1688
        self.assertAlmostEqual(dist_compression('abb', 'bbba', 'rle'), 1/3)
1689
        self.assertAlmostEqual(dist_compression('banana', 'banane', 'bwtrle'),
1690
                               0.57142857142)
1691
        self.assertAlmostEqual(dist_compression('bananas', 'bananen',
1692
                                                'bwtrle'),
1693
                               0.5)
1694
1695
    def test_sim_compression(self):
1696
        """Test abydos.distance.sim_compression."""
1697
        self.assertEqual(sim_compression('', ''), 1)
1698
        self.assertEqual(sim_compression('', '', 'bzip2'), 1)
1699
        self.assertEqual(sim_compression('', '', 'zlib'), 1)
1700
        self.assertEqual(sim_compression('', '', 'arith'), 1)
1701
        self.assertEqual(sim_compression('', '', 'arith', self.arith_dict), 1)
1702
        self.assertEqual(sim_compression('', '', 'rle'), 1)
1703
        self.assertEqual(sim_compression('', '', 'bwtrle'), 1)
1704
1705
        self.assertLess(sim_compression('a', ''), 1)
1706
        self.assertLess(sim_compression('a', '', 'bzip2'), 1)
1707
        self.assertLess(sim_compression('a', '', 'zlib'), 1)
1708
        self.assertLess(sim_compression('a', '', 'arith'), 1)
1709
        self.assertLess(sim_compression('a', '', 'arith', self.arith_dict), 1)
1710
        self.assertLess(sim_compression('a', '', 'rle'), 1)
1711
        self.assertLess(sim_compression('a', '', 'bwtrle'), 1)
1712
1713
        self.assertLess(sim_compression('abcdefg', 'fg'), 1)
1714
        self.assertLess(sim_compression('abcdefg', 'fg', 'bzip2'), 1)
1715
        self.assertLess(sim_compression('abcdefg', 'fg', 'zlib'), 1)
1716
        self.assertLess(sim_compression('abcdefg', 'fg', 'arith'), 1)
1717
        self.assertLess(sim_compression('abcdefg', 'fg', 'rle'), 1)
1718
        self.assertLess(sim_compression('abcdefg', 'fg', 'bwtrle'), 1)
1719
1720
    def test_sim_compression_arith(self):
1721
        """Test abydos.distance.sim_compression (arithmetric compression)."""
1722
        self.assertAlmostEqual(sim_compression('Niall', 'Neil', 'arith',
1723
                                               self.arith_dict),
1724
                               0.3913043478260869)
1725
        self.assertAlmostEqual(sim_compression('Neil', 'Niall', 'arith',
1726
                                               self.arith_dict),
1727
                               0.3913043478260869)
1728
        self.assertAlmostEqual(sim_compression('Niall', 'Neil', 'arith'),
1729
                               0.3125)
1730
        self.assertAlmostEqual(sim_compression('Neil', 'Niall', 'arith'),
1731
                               0.3125)
1732
        self.assertAlmostEqual(sim_compression('Njáll', 'Njall', 'arith',
1733
                                               self.arith_dict),
1734
                               0.285714285714285)
1735
        self.assertAlmostEqual(sim_compression('Njall', 'Njáll', 'arith',
1736
                                               self.arith_dict),
1737
                               0.285714285714285)
1738
        self.assertAlmostEqual(sim_compression('Njáll', 'Njall', 'arith'),
1739
                               0.25)
1740
        self.assertAlmostEqual(sim_compression('Njall', 'Njáll', 'arith'),
1741
                               0.25)
1742
1743
    def test_sim_compression_rle(self):
1744
        """Test abydos.distance.sim_compression (RLE & BWT+RLE)."""
1745
        self.assertAlmostEqual(sim_compression('abc', 'abc', 'rle'), 1)
1746
        self.assertAlmostEqual(sim_compression('abc', 'def', 'rle'), 0)
1747
1748
        self.assertAlmostEqual(sim_compression('abc', 'abc', 'bwtrle'), 1)
1749
        self.assertAlmostEqual(sim_compression('abc', 'def', 'bwtrle'), 0.25)
1750
1751
        self.assertAlmostEqual(sim_compression('aaa', 'bbaaa', 'rle'), 0.5)
1752
        self.assertAlmostEqual(sim_compression('abb', 'bbba', 'rle'), 2/3)
1753
        self.assertAlmostEqual(sim_compression('banana', 'banane', 'bwtrle'),
1754
                               0.42857142857)
1755
        self.assertAlmostEqual(sim_compression('bananas', 'bananen', 'bwtrle'),
1756
                               0.5)
1757
1758
    def test_lzma(self):
1759
        """Test LZMA-related sim/dist functions."""
1760
        if bool(pkgutil.find_loader('lzma')):
1761
            self.assertEqual(dist_compression('', '', 'lzma'), 0)
1762
            self.assertGreater(dist_compression('a', '', 'lzma'), 0)
1763
            self.assertGreater(dist_compression('abcdefg', 'fg', 'lzma'), 0)
1764
            self.assertEqual(sim_compression('', '', 'lzma'), 1)
1765
            self.assertLess(sim_compression('a', '', 'lzma'), 1)
1766
            self.assertLess(sim_compression('abcdefg', 'fg', 'lzma'), 1)
1767
            del sys.modules['lzma']
1768
1769
        self.assertRaises(ValueError, dist_compression, 'a', '', 'lzma')
1770
1771
1772
class MongeElkanTestCases(unittest.TestCase):
1773
    """Test Monge-Elkan functions.
1774
1775
    abydos.distance.sim_monge_elkan & .dist_monge_elkan
1776
    """
1777
1778
    def test_sim_monge_elkan(self):
1779
        """Test abydos.distance.sim_monge_elkan."""
1780
        self.assertEqual(sim_monge_elkan('', ''), 1)
1781
        self.assertEqual(sim_monge_elkan('', 'a'), 0)
1782
        self.assertEqual(sim_monge_elkan('a', 'a'), 1)
1783
1784
        self.assertEqual(sim_monge_elkan('Niall', 'Neal'), 3/4)
1785
        self.assertEqual(sim_monge_elkan('Niall', 'Njall'), 5/6)
1786
        self.assertEqual(sim_monge_elkan('Niall', 'Niel'), 3/4)
1787
        self.assertEqual(sim_monge_elkan('Niall', 'Nigel'), 3/4)
1788
1789
        self.assertEqual(sim_monge_elkan('Niall', 'Neal', symmetric=True),
1790
                         31/40)
1791
        self.assertEqual(sim_monge_elkan('Niall', 'Njall', symmetric=True),
1792
                         5/6)
1793
        self.assertEqual(sim_monge_elkan('Niall', 'Niel', symmetric=True),
1794
                         31/40)
1795
        self.assertAlmostEqual(sim_monge_elkan('Niall', 'Nigel',
1796
                                               symmetric=True), 17/24)
1797
1798
    def test_dist_monge_elkan(self):
1799
        """Test abydos.distance.dist_monge_elkan."""
1800
        self.assertEqual(dist_monge_elkan('', ''), 0)
1801
        self.assertEqual(dist_monge_elkan('', 'a'), 1)
1802
1803
        self.assertEqual(dist_monge_elkan('Niall', 'Neal'), 1/4)
1804
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Njall'), 1/6)
1805
        self.assertEqual(dist_monge_elkan('Niall', 'Niel'), 1/4)
1806
        self.assertEqual(dist_monge_elkan('Niall', 'Nigel'), 1/4)
1807
1808
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Neal',
1809
                                                symmetric=True), 9/40)
1810
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Njall',
1811
                                                symmetric=True), 1/6)
1812
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Niel',
1813
                                                symmetric=True), 9/40)
1814
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Nigel',
1815
                                                symmetric=True), 7/24)
1816
1817
1818
class IdentityTestCases(unittest.TestCase):
1819
    """Test identity similarity functions.
1820
1821
    abydos.distance.sim_ident & .dist_ident
1822
    """
1823
1824
    def test_sim_ident(self):
1825
        """Test abydos.distance.sim_ident."""
1826
        self.assertEqual(sim_ident('', ''), 1)
1827
        self.assertEqual(sim_ident('', 'a'), 0)
1828
        self.assertEqual(sim_ident('a', ''), 0)
1829
        self.assertEqual(sim_ident('a', 'a'), 1)
1830
        self.assertEqual(sim_ident('abcd', 'abcd'), 1)
1831
        self.assertEqual(sim_ident('abcd', 'dcba'), 0)
1832
        self.assertEqual(sim_ident('abc', 'cba'), 0)
1833
1834
    def test_dist_ident(self):
1835
        """Test abydos.distance.dist_ident."""
1836
        self.assertEqual(dist_ident('', ''), 0)
1837
        self.assertEqual(dist_ident('', 'a'), 1)
1838
        self.assertEqual(dist_ident('a', ''), 1)
1839
        self.assertEqual(dist_ident('a', 'a'), 0)
1840
        self.assertEqual(dist_ident('abcd', 'abcd'), 0)
1841
        self.assertEqual(dist_ident('abcd', 'dcba'), 1)
1842
        self.assertEqual(dist_ident('abc', 'cba'), 1)
1843
1844
1845
def _sim_wikipedia(src, tar):
1846
    """Return a similarity score for two DNA base pairs.
1847
1848
    Values copied from:
1849
    https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm
1850
    """
1851
    nw_matrix = {('A', 'A'): 10, ('G', 'G'): 7, ('C', 'C'): 9, ('T', 'T'): 8,
1852
                 ('A', 'G'): -1, ('A', 'C'): -3, ('A', 'T'): -4,
1853
                 ('G', 'C'): -5, ('G', 'T'): -3, ('C', 'T'): 0}
1854
    return sim_matrix(src, tar, nw_matrix, symmetric=True, alphabet='CGAT')
1855
1856
1857
def _sim_nw(src, tar):
1858
    """Return 1 if src is tar, otherwise -1."""
1859
    return 2*float(src is tar)-1
1860
1861
1862
class MatrixSimTestCases(unittest.TestCase):
1863
    """Test matrix similarity functions.
1864
1865
    abydos.distance.sim_matrix
1866
    """
1867
1868
    def test_sim_matrix(self):
1869
        """Test abydos.distance.sim_matrix."""
1870
        self.assertEqual(sim_matrix('', ''), 1)
1871
        self.assertEqual(sim_matrix('', 'a'), 0)
1872
        self.assertEqual(sim_matrix('a', ''), 0)
1873
        self.assertEqual(sim_matrix('a', 'a'), 1)
1874
        self.assertEqual(sim_matrix('abcd', 'abcd'), 1)
1875
        self.assertEqual(sim_matrix('abcd', 'dcba'), 0)
1876
        self.assertEqual(sim_matrix('abc', 'cba'), 0)
1877
1878
        # https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
1879
        self.assertEqual(_sim_wikipedia('A', 'C'), -3)
1880
        self.assertEqual(_sim_wikipedia('G', 'G'), 7)
1881
        self.assertEqual(_sim_wikipedia('A', 'A'), 10)
1882
        self.assertEqual(_sim_wikipedia('T', 'A'), -4)
1883
        self.assertEqual(_sim_wikipedia('T', 'C'), 0)
1884
        self.assertEqual(_sim_wikipedia('A', 'G'), -1)
1885
        self.assertEqual(_sim_wikipedia('C', 'T'), 0)
1886
1887
        self.assertRaises(ValueError, sim_matrix, 'abc', 'cba', alphabet='ab')
1888
        self.assertRaises(ValueError, sim_matrix, 'abc', 'ba', alphabet='ab')
1889
        self.assertRaises(ValueError, sim_matrix, 'ab', 'cba', alphabet='ab')
1890
1891
1892
class NeedlemanWunschTestCases(unittest.TestCase):
1893
    """Test Needleman-Wunsch functions.
1894
1895
    abydos.distance.needleman_wunsch
1896
    """
1897
1898
    def test_needleman_wunsch(self):
1899
        """Test abydos.distance.needleman_wunsch."""
1900
        self.assertEqual(needleman_wunsch('', ''), 0)
1901
1902
        # https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
1903
        self.assertEqual(needleman_wunsch('GATTACA', 'GCATGCU',
1904
                                          1, _sim_nw), 0)
1905
        self.assertEqual(needleman_wunsch('AGACTAGTTAC', 'CGAGACGT',
1906
                                          5, _sim_wikipedia), 16)
1907
1908
        # checked against http://ds9a.nl/nwunsch/ (mismatch=1, gap=5, skew=5)
1909
        self.assertEqual(needleman_wunsch('CGATATCAG', 'TGACGSTGC',
1910
                                          5, _sim_nw), -5)
1911
        self.assertEqual(needleman_wunsch('AGACTAGTTAC', 'TGACGSTGC',
1912
                                          5, _sim_nw), -7)
1913
        self.assertEqual(needleman_wunsch('AGACTAGTTAC', 'CGAGACGT',
1914
                                          5, _sim_nw), -15)
1915
1916
    def test_needleman_wunsch_nialls(self):
1917
        """Test abydos.distance.needleman_wunsch (Nialls set)."""
1918
        # checked against http://ds9a.nl/nwunsch/ (mismatch=1, gap=2, skew=2)
1919
        nw_vals = (5, 0, -2, 3, 1, 1, -2, -2, -1, -3, -3, -5, -3, -7, -7, -19)
1920
        for i in range(len(NIALL)):
1921
            self.assertEqual(needleman_wunsch(NIALL[0], NIALL[i], 2,
1922
                                              _sim_nw), nw_vals[i])
1923
1924
1925
class SmithWatermanTestCases(unittest.TestCase):
1926
    """Test Smith-Waterman functions.
1927
1928
    abydos.distance.smith_waterman
1929
    """
1930
1931
    def test_smith_waterman(self):
1932
        """Test abydos.distance.smith_waterman."""
1933
        self.assertEqual(smith_waterman('', ''), 0)
1934
1935
        # https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
1936
        self.assertEqual(smith_waterman('GATTACA', 'GCATGCU',
1937
                                        1, _sim_nw), 0)
1938
        self.assertEqual(smith_waterman('AGACTAGTTAC', 'CGAGACGT',
1939
                                        5, _sim_wikipedia), 26)
1940
1941
        self.assertEqual(smith_waterman('CGATATCAG', 'TGACGSTGC',
1942
                                        5, _sim_nw), 0)
1943
        self.assertEqual(smith_waterman('AGACTAGTTAC', 'TGACGSTGC',
1944
                                        5, _sim_nw), 1)
1945
        self.assertEqual(smith_waterman('AGACTAGTTAC', 'CGAGACGT',
1946
                                        5, _sim_nw), 0)
1947
1948
    def test_smith_waterman_nialls(self):
1949
        """Test abydos.distance.smith_waterman (Nialls set)."""
1950
        sw_vals = (5, 1, 1, 3, 2, 1, 1, 0, 0, 1, 1, 2, 2, 1, 0, 0)
1951
        for i in range(len(NIALL)):
1952
            self.assertEqual(smith_waterman(NIALL[0], NIALL[i], 2,
1953
                                            _sim_nw), sw_vals[i])
1954
1955
1956
class GotohTestCases(unittest.TestCase):
1957
    """Test Gotoh functions.
1958
1959
    abydos.distance.gotoh
1960
    """
1961
1962
    def test_gotoh(self):
1963
        """Test abydos.distance.needleman_wunsch_affine."""
1964
        self.assertEqual(gotoh('', ''), 0)
1965
1966
        # https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
1967
        self.assertEqual(gotoh('GATTACA', 'GCATGCU', 1, 1, _sim_nw), 0)
1968
        self.assertGreaterEqual(gotoh('GATTACA', 'GCATGCU', 1, 0.5, _sim_nw),
1969
                                needleman_wunsch('GATTACA', 'GCATGCU', 1,
1970
                                                 _sim_nw))
1971
        self.assertEqual(gotoh('AGACTAGTTAC', 'CGAGACGT', 5, 5,
1972
                               _sim_wikipedia), 16)
1973
        self.assertGreaterEqual(gotoh('AGACTAGTTAC', 'CGAGACGT', 5, 2,
1974
                                      _sim_wikipedia),
1975
                                needleman_wunsch('AGACTAGTTAC', 'CGAGACGT', 5,
1976
                                                 _sim_wikipedia))
1977
1978
        # checked against http://ds9a.nl/nwunsch/ (mismatch=1, gap=5, skew=5)
1979
        self.assertEqual(gotoh('CGATATCAG', 'TGACGSTGC', 5, 5, _sim_nw), -5)
1980
        self.assertGreaterEqual(gotoh('CGATATCAG', 'TGACGSTGC', 5, 2, _sim_nw),
1981
                                needleman_wunsch('CGATATCAG', 'TGACGSTGC', 5,
1982
                                                 _sim_nw))
1983
        self.assertEqual(gotoh('AGACTAGTTAC', 'TGACGSTGC', 5, 5, _sim_nw), -7)
1984
        self.assertGreaterEqual(gotoh('AGACTAGTTAC', 'TGACGSTGC', 5, 2,
1985
                                      _sim_nw),
1986
                                needleman_wunsch('AGACTAGTTAC', 'TGACGSTGC', 5,
1987
                                                 _sim_nw))
1988
        self.assertEqual(gotoh('AGACTAGTTAC', 'CGAGACGT', 5, 5, _sim_nw), -15)
1989
        self.assertGreaterEqual(gotoh('AGACTAGTTAC', 'CGAGACGT', 5, 2,
1990
                                      _sim_nw),
1991
                                needleman_wunsch('AGACTAGTTAC', 'CGAGACGT', 5,
1992
                                                 _sim_nw))
1993
1994
    def test_gotoh_nialls(self):
1995
        """Test abydos.distance.gotoh (Nialls set)."""
1996
        # checked against http://ds9a.nl/nwunsch/ (mismatch=1, gap=2, skew=2)
1997
        nw_vals = (5, 0, -2, 3, 1, 1, -2, -2, -1, -3, -3, -5, -3, -7, -7, -19)
1998
        for i in range(len(NIALL)):
1999
            self.assertEqual(gotoh(NIALL[0], NIALL[i], 2, 2, _sim_nw),
2000
                             nw_vals[i])
2001
        nw_vals2 = (5, 0, -2, 3, 1, 1, -2, -2, -1, -2, -3, -3, -2, -6, -6, -8)
2002
        for i in range(len(NIALL)):
2003
            self.assertEqual(gotoh(NIALL[0], NIALL[i], 2, 1, _sim_nw),
2004
                             nw_vals2[i])
2005
            self.assertGreaterEqual(gotoh(NIALL[0], NIALL[i], 2, 0.5, _sim_nw),
2006
                                    needleman_wunsch(NIALL[0], NIALL[i], 2,
2007
                                                     _sim_nw))
2008
2009
2010
class LengthTestCases(unittest.TestCase):
2011
    """Test length similarity functions.
2012
2013
    abydos.distance.sim_length & .dist_length
2014
    """
2015
2016
    def test_sim_ident(self):
2017
        """Test abydos.distance.sim_length."""
2018
        self.assertEqual(sim_length('', ''), 1)
2019
        self.assertEqual(sim_length('', 'a'), 0)
2020
        self.assertEqual(sim_length('a', ''), 0)
2021
        self.assertEqual(sim_length('a', 'a'), 1)
2022
        self.assertEqual(sim_length('abcd', 'abcd'), 1)
2023
        self.assertEqual(sim_length('abcd', 'dcba'), 1)
2024
        self.assertEqual(sim_length('abc', 'cba'), 1)
2025
        self.assertEqual(sim_length('abc', 'dcba'), 0.75)
2026
        self.assertEqual(sim_length('abcd', 'cba'), 0.75)
2027
        self.assertEqual(sim_length('ab', 'dcba'), 0.5)
2028
        self.assertEqual(sim_length('abcd', 'ba'), 0.5)
2029
2030
    def test_dist_ident(self):
2031
        """Test abydos.distance.dist_length."""
2032
        self.assertEqual(dist_length('', ''), 0)
2033
        self.assertEqual(dist_length('', 'a'), 1)
2034
        self.assertEqual(dist_length('a', ''), 1)
2035
        self.assertEqual(dist_length('a', 'a'), 0)
2036
        self.assertEqual(dist_length('abcd', 'abcd'), 0)
2037
        self.assertEqual(dist_length('abcd', 'dcba'), 0)
2038
        self.assertEqual(dist_length('abc', 'cba'), 0)
2039
        self.assertEqual(dist_length('abc', 'dcba'), 0.25)
2040
        self.assertEqual(dist_length('abcd', 'cba'), 0.25)
2041
        self.assertEqual(dist_length('ab', 'dcba'), 0.5)
2042
        self.assertEqual(dist_length('abcd', 'ba'), 0.5)
2043
2044
2045
class PrefixTestCases(unittest.TestCase):
2046
    """Test prefix similarity functions.
2047
2048
    abydos.distance.sim_prefix & .dist_prefix
2049
    """
2050
2051
    def test_sim_prefix(self):
2052
        """Test abydos.distance.sim_prefix."""
2053
        self.assertEqual(sim_prefix('', ''), 1)
2054
        self.assertEqual(sim_prefix('a', ''), 0)
2055
        self.assertEqual(sim_prefix('', 'a'), 0)
2056
        self.assertEqual(sim_prefix('a', 'a'), 1)
2057
        self.assertEqual(sim_prefix('ax', 'a'), 1)
2058
        self.assertEqual(sim_prefix('axx', 'a'), 1)
2059
        self.assertEqual(sim_prefix('ax', 'ay'), 1/2)
2060
        self.assertEqual(sim_prefix('a', 'ay'), 1)
2061
        self.assertEqual(sim_prefix('a', 'ayy'), 1)
2062
        self.assertEqual(sim_prefix('ax', 'ay'), 1/2)
2063
        self.assertEqual(sim_prefix('a', 'y'), 0)
2064
        self.assertEqual(sim_prefix('y', 'a'), 0)
2065
        self.assertEqual(sim_prefix('aaax', 'aaa'), 1)
2066
        self.assertAlmostEqual(sim_prefix('axxx', 'aaa'), 1/3)
2067
        self.assertEqual(sim_prefix('aaxx', 'aayy'), 1/2)
2068
        self.assertEqual(sim_prefix('xxaa', 'yyaa'), 0)
2069
        self.assertAlmostEqual(sim_prefix('aaxxx', 'aay'), 2/3)
2070
        self.assertEqual(sim_prefix('aaxxxx', 'aayyy'), 2/5)
2071
        self.assertEqual(sim_prefix('xa', 'a'), 0)
2072
        self.assertEqual(sim_prefix('xxa', 'a'), 0)
2073
        self.assertEqual(sim_prefix('xa', 'ya'), 0)
2074
        self.assertEqual(sim_prefix('a', 'ya'), 0)
2075
        self.assertEqual(sim_prefix('a', 'yya'), 0)
2076
        self.assertEqual(sim_prefix('xa', 'ya'), 0)
2077
        self.assertEqual(sim_prefix('xaaa', 'aaa'), 0)
2078
        self.assertEqual(sim_prefix('xxxa', 'aaa'), 0)
2079
        self.assertEqual(sim_prefix('xxxaa', 'yaa'), 0)
2080
        self.assertEqual(sim_prefix('xxxxaa', 'yyyaa'), 0)
2081
2082
    def test_dist_prefix(self):
2083
        """Test abydos.distance.dist_prefix."""
2084
        self.assertEqual(dist_prefix('', ''), 0)
2085
        self.assertEqual(dist_prefix('a', ''), 1)
2086
        self.assertEqual(dist_prefix('', 'a'), 1)
2087
        self.assertEqual(dist_prefix('a', 'a'), 0)
2088
        self.assertEqual(dist_prefix('ax', 'a'), 0)
2089
        self.assertEqual(dist_prefix('axx', 'a'), 0)
2090
        self.assertEqual(dist_prefix('ax', 'ay'), 1/2)
2091
        self.assertEqual(dist_prefix('a', 'ay'), 0)
2092
        self.assertEqual(dist_prefix('a', 'ayy'), 0)
2093
        self.assertEqual(dist_prefix('ax', 'ay'), 1/2)
2094
        self.assertEqual(dist_prefix('a', 'y'), 1)
2095
        self.assertEqual(dist_prefix('y', 'a'), 1)
2096
        self.assertEqual(dist_prefix('aaax', 'aaa'), 0)
2097
        self.assertAlmostEqual(dist_prefix('axxx', 'aaa'), 2/3)
2098
        self.assertEqual(dist_prefix('aaxx', 'aayy'), 1/2)
2099
        self.assertEqual(dist_prefix('xxaa', 'yyaa'), 1)
2100
        self.assertAlmostEqual(dist_prefix('aaxxx', 'aay'), 1/3)
2101
        self.assertEqual(dist_prefix('aaxxxx', 'aayyy'), 3/5)
2102
        self.assertEqual(dist_prefix('xa', 'a'), 1)
2103
        self.assertEqual(dist_prefix('xxa', 'a'), 1)
2104
        self.assertEqual(dist_prefix('xa', 'ya'), 1)
2105
        self.assertEqual(dist_prefix('a', 'ya'), 1)
2106
        self.assertEqual(dist_prefix('a', 'yya'), 1)
2107
        self.assertEqual(dist_prefix('xa', 'ya'), 1)
2108
        self.assertEqual(dist_prefix('xaaa', 'aaa'), 1)
2109
        self.assertEqual(dist_prefix('xxxa', 'aaa'), 1)
2110
        self.assertEqual(dist_prefix('xxxaa', 'yaa'), 1)
2111
        self.assertEqual(dist_prefix('xxxxaa', 'yyyaa'), 1)
2112
2113
2114
class SuffixTestCases(unittest.TestCase):
2115
    """Test suffix similarity functions.
2116
2117
    abydos.distance.sim_suffix & .dist_suffix
2118
    """
2119
2120
    def test_sim_suffix(self):
2121
        """Test abydos.distance.sim_suffix."""
2122
        self.assertEqual(sim_suffix('', ''), 1)
2123
        self.assertEqual(sim_suffix('a', ''), 0)
2124
        self.assertEqual(sim_suffix('', 'a'), 0)
2125
        self.assertEqual(sim_suffix('a', 'a'), 1)
2126
        self.assertEqual(sim_suffix('ax', 'a'), 0)
2127
        self.assertEqual(sim_suffix('axx', 'a'), 0)
2128
        self.assertEqual(sim_suffix('ax', 'ay'), 0)
2129
        self.assertEqual(sim_suffix('a', 'ay'), 0)
2130
        self.assertEqual(sim_suffix('a', 'ayy'), 0)
2131
        self.assertEqual(sim_suffix('ax', 'ay'), 0)
2132
        self.assertEqual(sim_suffix('a', 'y'), 0)
2133
        self.assertEqual(sim_suffix('y', 'a'), 0)
2134
        self.assertEqual(sim_suffix('aaax', 'aaa'), 0)
2135
        self.assertEqual(sim_suffix('axxx', 'aaa'), 0)
2136
        self.assertEqual(sim_suffix('aaxx', 'aayy'), 0)
2137
        self.assertEqual(sim_suffix('xxaa', 'yyaa'), 1/2)
2138
        self.assertEqual(sim_suffix('aaxxx', 'aay'), 0)
2139
        self.assertEqual(sim_suffix('aaxxxx', 'aayyy'), 0)
2140
        self.assertEqual(sim_suffix('xa', 'a'), 1)
2141
        self.assertEqual(sim_suffix('xxa', 'a'), 1)
2142
        self.assertEqual(sim_suffix('xa', 'ya'), 1/2)
2143
        self.assertEqual(sim_suffix('a', 'ya'), 1)
2144
        self.assertEqual(sim_suffix('a', 'yya'), 1)
2145
        self.assertEqual(sim_suffix('xa', 'ya'), 1/2)
2146
        self.assertEqual(sim_suffix('xaaa', 'aaa'), 1)
2147
        self.assertAlmostEqual(sim_suffix('xxxa', 'aaa'), 1/3)
2148
        self.assertAlmostEqual(sim_suffix('xxxaa', 'yaa'), 2/3)
2149
        self.assertEqual(sim_suffix('xxxxaa', 'yyyaa'), 2/5)
2150
2151
    def test_dist_suffix(self):
2152
        """Test abydos.distance.dist_suffix."""
2153
        self.assertEqual(dist_suffix('', ''), 0)
2154
        self.assertEqual(dist_suffix('a', ''), 1)
2155
        self.assertEqual(dist_suffix('', 'a'), 1)
2156
        self.assertEqual(dist_suffix('a', 'a'), 0)
2157
        self.assertEqual(dist_suffix('ax', 'a'), 1)
2158
        self.assertEqual(dist_suffix('axx', 'a'), 1)
2159
        self.assertEqual(dist_suffix('ax', 'ay'), 1)
2160
        self.assertEqual(dist_suffix('a', 'ay'), 1)
2161
        self.assertEqual(dist_suffix('a', 'ayy'), 1)
2162
        self.assertEqual(dist_suffix('ax', 'ay'), 1)
2163
        self.assertEqual(dist_suffix('a', 'y'), 1)
2164
        self.assertEqual(dist_suffix('y', 'a'), 1)
2165
        self.assertEqual(dist_suffix('aaax', 'aaa'), 1)
2166
        self.assertEqual(dist_suffix('axxx', 'aaa'), 1)
2167
        self.assertEqual(dist_suffix('aaxx', 'aayy'), 1)
2168
        self.assertEqual(dist_suffix('xxaa', 'yyaa'), 1/2)
2169
        self.assertEqual(dist_suffix('aaxxx', 'aay'), 1)
2170
        self.assertEqual(dist_suffix('aaxxxx', 'aayyy'), 1)
2171
        self.assertEqual(dist_suffix('xa', 'a'), 0)
2172
        self.assertEqual(dist_suffix('xxa', 'a'), 0)
2173
        self.assertEqual(dist_suffix('xa', 'ya'), 1/2)
2174
        self.assertEqual(dist_suffix('a', 'ya'), 0)
2175
        self.assertEqual(dist_suffix('a', 'yya'), 0)
2176
        self.assertEqual(dist_suffix('xa', 'ya'), 1/2)
2177
        self.assertEqual(dist_suffix('xaaa', 'aaa'), 0)
2178
        self.assertAlmostEqual(dist_suffix('xxxa', 'aaa'), 2/3)
2179
        self.assertAlmostEqual(dist_suffix('xxxaa', 'yaa'), 1/3)
2180
        self.assertEqual(dist_suffix('xxxxaa', 'yyyaa'), 3/5)
2181
2182
2183
class MLIPNSTestCases(unittest.TestCase):
2184
    """Test MLIPNS functions.
2185
2186
    abydos.distance.sim_mlipns & .dist_mlipns
2187
    """
2188
2189
    def test_sim_mlipns(self):
2190
        """Test abydos.distance.sim_mlipns."""
2191
        self.assertEqual(sim_mlipns('', ''), 1)
2192
        self.assertEqual(sim_mlipns('a', ''), 0)
2193
        self.assertEqual(sim_mlipns('', 'a'), 0)
2194
        self.assertEqual(sim_mlipns('a', 'a'), 1)
2195
        self.assertEqual(sim_mlipns('ab', 'a'), 1)
2196
        self.assertEqual(sim_mlipns('abc', 'abc'), 1)
2197
        self.assertEqual(sim_mlipns('abc', 'abcde'), 1)
2198
        self.assertEqual(sim_mlipns('abcg', 'abcdeg'), 1)
2199
        self.assertEqual(sim_mlipns('abcg', 'abcdefg'), 0)
2200
        self.assertEqual(sim_mlipns('Tomato', 'Tamato'), 1)
2201
        self.assertEqual(sim_mlipns('ato', 'Tam'), 1)
2202
2203
    def test_dist_mlipns(self):
2204
        """Test abydos.distance.dist_mlipns."""
2205
        self.assertEqual(dist_mlipns('', ''), 0)
2206
        self.assertEqual(dist_mlipns('a', ''), 1)
2207
        self.assertEqual(dist_mlipns('', 'a'), 1)
2208
        self.assertEqual(dist_mlipns('a', 'a'), 0)
2209
        self.assertEqual(dist_mlipns('ab', 'a'), 0)
2210
        self.assertEqual(dist_mlipns('abc', 'abc'), 0)
2211
        self.assertEqual(dist_mlipns('abc', 'abcde'), 0)
2212
        self.assertEqual(dist_mlipns('abcg', 'abcdeg'), 0)
2213
        self.assertEqual(dist_mlipns('abcg', 'abcdefg'), 1)
2214
        self.assertEqual(dist_mlipns('Tomato', 'Tamato'), 0)
2215
        self.assertEqual(dist_mlipns('ato', 'Tam'), 0)
2216
2217
2218
class BagTestCases(unittest.TestCase):
2219
    """Test bag similarity functions.
2220
2221
    abydos.distance.bag, .sim_bag & .dist_bag
2222
    """
2223
2224
    def test_bag(self):
2225
        """Test abydos.distance.bag."""
2226
        self.assertEqual(bag('', ''), 0)
2227
        self.assertEqual(bag('nelson', ''), 6)
2228
        self.assertEqual(bag('', 'neilsen'), 7)
2229
        self.assertEqual(bag('ab', 'a'), 1)
2230
        self.assertEqual(bag('ab', 'c'), 2)
2231
        self.assertEqual(bag('nelson', 'neilsen'), 2)
2232
        self.assertEqual(bag('neilsen', 'nelson'), 2)
2233
        self.assertEqual(bag('niall', 'neal'), 2)
2234
        self.assertEqual(bag('aluminum', 'Catalan'), 5)
2235
        self.assertEqual(bag('abcdefg', 'hijklm'), 7)
2236
        self.assertEqual(bag('abcdefg', 'hijklmno'), 8)
2237
2238
    def test_sim_bag(self):
2239
        """Test abydos.distance.sim_bag."""
2240
        self.assertEqual(sim_bag('', ''), 1)
2241
        self.assertEqual(sim_bag('nelson', ''), 0)
2242
        self.assertEqual(sim_bag('', 'neilsen'), 0)
2243
        self.assertEqual(sim_bag('ab', 'a'), 0.5)
2244
        self.assertEqual(sim_bag('ab', 'c'), 0)
2245
        self.assertAlmostEqual(sim_bag('nelson', 'neilsen'), 5/7)
2246
        self.assertAlmostEqual(sim_bag('neilsen', 'nelson'), 5/7)
2247
        self.assertAlmostEqual(sim_bag('niall', 'neal'), 3/5)
2248
        self.assertAlmostEqual(sim_bag('aluminum', 'Catalan'), 3/8)
2249
        self.assertEqual(sim_bag('abcdefg', 'hijklm'), 0)
2250
        self.assertEqual(sim_bag('abcdefg', 'hijklmno'), 0)
2251
2252
    def test_dist_bag(self):
2253
        """Test abydos.distance.dist_bag."""
2254
        self.assertEqual(dist_bag('', ''), 0)
2255
        self.assertEqual(dist_bag('nelson', ''), 1)
2256
        self.assertEqual(dist_bag('', 'neilsen'), 1)
2257
        self.assertEqual(dist_bag('ab', 'a'), 0.5)
2258
        self.assertEqual(dist_bag('ab', 'c'), 1)
2259
        self.assertAlmostEqual(dist_bag('nelson', 'neilsen'), 2/7)
2260
        self.assertAlmostEqual(dist_bag('neilsen', 'nelson'), 2/7)
2261
        self.assertAlmostEqual(dist_bag('niall', 'neal'), 2/5)
2262
        self.assertAlmostEqual(dist_bag('aluminum', 'Catalan'), 5/8)
2263
        self.assertEqual(dist_bag('abcdefg', 'hijklm'), 1)
2264
        self.assertEqual(dist_bag('abcdefg', 'hijklmno'), 1)
2265
2266
2267
class EditexTestCases(unittest.TestCase):
2268
    """Test Editex functions.
2269
2270
    abydos.distance.editex, .sim_editex & .dist_editex
2271
    """
2272
2273
    def test_editex(self):
2274
        """Test abydos.distance.editex."""
2275
        self.assertEqual(editex('', ''), 0)
2276
        self.assertEqual(editex('nelson', ''), 12)
2277
        self.assertEqual(editex('', 'neilsen'), 14)
2278
        self.assertEqual(editex('ab', 'a'), 2)
2279
        self.assertEqual(editex('ab', 'c'), 4)
2280
        self.assertEqual(editex('nelson', 'neilsen'), 2)
2281
        self.assertEqual(editex('neilsen', 'nelson'), 2)
2282
        self.assertEqual(editex('niall', 'neal'), 1)
2283
        self.assertEqual(editex('neal', 'niall'), 1)
2284
        self.assertEqual(editex('niall', 'nihal'), 2)
2285
        self.assertEqual(editex('nihal', 'niall'), 2)
2286
        self.assertEqual(editex('neal', 'nihl'), 3)
2287
        self.assertEqual(editex('nihl', 'neal'), 3)
2288
2289
    def test_editex_local(self):
2290
        """Test abydos.distance.editex (local variant)."""
2291
        self.assertEqual(editex('', '', local=True), 0)
2292
        self.assertEqual(editex('nelson', '', local=True), 12)
2293
        self.assertEqual(editex('', 'neilsen', local=True), 14)
2294
        self.assertEqual(editex('ab', 'a', local=True), 2)
2295
        self.assertEqual(editex('ab', 'c', local=True), 2)
2296
        self.assertEqual(editex('nelson', 'neilsen', local=True), 2)
2297
        self.assertEqual(editex('neilsen', 'nelson', local=True), 2)
2298
        self.assertEqual(editex('niall', 'neal', local=True), 1)
2299
        self.assertEqual(editex('neal', 'niall', local=True), 1)
2300
        self.assertEqual(editex('niall', 'nihal', local=True), 2)
2301
        self.assertEqual(editex('nihal', 'niall', local=True), 2)
2302
        self.assertEqual(editex('neal', 'nihl', local=True), 3)
2303
        self.assertEqual(editex('nihl', 'neal', local=True), 3)
2304
2305
    def test_sim_editex(self):
2306
        """Test abydos.distance.sim_editex."""
2307
        self.assertEqual(sim_editex('', ''), 1)
2308
        self.assertEqual(sim_editex('nelson', ''), 0)
2309
        self.assertEqual(sim_editex('', 'neilsen'), 0)
2310
        self.assertEqual(sim_editex('ab', 'a'), 0.5)
2311
        self.assertEqual(sim_editex('ab', 'c'), 0)
2312
        self.assertAlmostEqual(sim_editex('nelson', 'neilsen'), 12/14)
2313
        self.assertAlmostEqual(sim_editex('neilsen', 'nelson'), 12/14)
2314
        self.assertEqual(sim_editex('niall', 'neal'), 0.9)
2315
2316
    def test_dist_editex(self):
2317
        """Test abydos.distance.dist_editex."""
2318
        self.assertEqual(dist_editex('', ''), 0)
2319
        self.assertEqual(dist_editex('nelson', ''), 1)
2320
        self.assertEqual(dist_editex('', 'neilsen'), 1)
2321
        self.assertEqual(dist_editex('ab', 'a'), 0.5)
2322
        self.assertEqual(dist_editex('ab', 'c'), 1)
2323
        self.assertAlmostEqual(dist_editex('nelson', 'neilsen'), 2/14)
2324
        self.assertAlmostEqual(dist_editex('neilsen', 'nelson'), 2/14)
2325
        self.assertEqual(dist_editex('niall', 'neal'), 0.1)
2326
2327
2328
class EudexTestCases(unittest.TestCase):
2329
    """Test Eudex distance functions.
2330
2331
    abydos.distance.eudex_hamming, dist_eudex, & sim_eudex
2332
    """
2333
2334
    def test_eudex_hamming(self):
2335
        """Test abydos.distance.eudex_hamming."""
2336
        # Base cases
2337
        self.assertEqual(eudex_hamming('', ''), 0)
2338
        self.assertEqual(eudex_hamming('', '', None), 0)
2339
        self.assertEqual(eudex_hamming('', '', 'fibonacci'), 0)
2340
        self.assertEqual(eudex_hamming('', '', [10, 1, 1, 1]), 0)
2341
        self.assertEqual(eudex_hamming('', '',
2342
                                       lambda: [(yield 1) for _
2343
                                                in range(10)]), 0)
2344
        self.assertEqual(eudex_hamming('', '', normalized=True), 0)
2345
2346
        self.assertEqual(eudex_hamming('Niall', 'Niall'), 0)
2347
        self.assertEqual(eudex_hamming('Niall', 'Niall', None), 0)
2348
        self.assertEqual(eudex_hamming('Niall', 'Niall', 'fibonacci'), 0)
2349
        self.assertEqual(eudex_hamming('Niall', 'Niall', [10, 1, 1, 1]), 0)
2350
        self.assertEqual(eudex_hamming('Niall', 'Niall',
2351
                                       lambda: [(yield 1) for _
2352
                                                in range(10)]), 0)
2353
        self.assertEqual(eudex_hamming('Niall', 'Niall', normalized=True), 0)
2354
2355
        self.assertEqual(eudex_hamming('Niall', 'Neil'), 2)
2356
        self.assertEqual(eudex_hamming('Niall', 'Neil', None), 1)
2357
        self.assertEqual(eudex_hamming('Niall', 'Neil', 'fibonacci'), 2)
2358
        self.assertEqual(eudex_hamming('Niall', 'Neil', [10, 1, 1, 1]), 1)
2359
        self.assertEqual(eudex_hamming('Niall', 'Neil',
2360
                                       lambda: [(yield 1) for _
2361
                                                in range(10)]), 1)
2362
        self.assertAlmostEqual(eudex_hamming('Niall', 'Neil', normalized=True),
2363
                               0.00098039)
2364
2365
        self.assertEqual(eudex_hamming('Niall', 'Colin'), 524)
2366
        self.assertEqual(eudex_hamming('Niall', 'Colin', None), 10)
2367
        self.assertEqual(eudex_hamming('Niall', 'Colin', 'fibonacci'), 146)
2368
        self.assertEqual(eudex_hamming('Niall', 'Colin', [10, 1, 1, 1]), 6)
2369
        self.assertEqual(eudex_hamming('Niall', 'Colin',
2370
                                       lambda: [(yield 1) for _
2371
                                                in range(10)]), 10)
2372
        self.assertAlmostEqual(eudex_hamming('Niall', 'Colin',
2373
                                             normalized=True), 0.25686274)
2374
2375
    def test_dist_eudex(self):
2376
        """Test abydos.distance.dist_eudex."""
2377
        # Base cases
2378
        self.assertEqual(dist_eudex('', ''), 0)
2379
        self.assertEqual(dist_eudex('', '', None), 0)
2380
        self.assertEqual(dist_eudex('', '', 'fibonacci'), 0)
2381
2382
        self.assertEqual(dist_eudex('Niall', 'Niall'), 0)
2383
        self.assertEqual(dist_eudex('Niall', 'Niall', None), 0)
2384
        self.assertEqual(dist_eudex('Niall', 'Niall', 'fibonacci'), 0)
2385
2386
        self.assertAlmostEqual(dist_eudex('Niall', 'Neil'), 0.00098039)
2387
        self.assertAlmostEqual(dist_eudex('Niall', 'Neil', None), 0.11111111)
2388
        self.assertAlmostEqual(dist_eudex('Niall', 'Neil', 'fibonacci'),
2389
                               0.00287356)
2390
2391
        self.assertAlmostEqual(dist_eudex('Niall', 'Colin'), 0.25686275)
2392
        self.assertAlmostEqual(dist_eudex('Niall', 'Colin', None), 0.16666667)
2393
        self.assertAlmostEqual(dist_eudex('Niall', 'Colin', 'fibonacci'),
2394
                               0.20977011)
2395
2396
    def test_sim_eudex(self):
2397
        """Test abydos.distance.sim_eudex."""
2398
        # Base cases
2399
        self.assertEqual(sim_eudex('', ''), 1)
2400
        self.assertEqual(sim_eudex('', '', None), 1)
2401
        self.assertEqual(sim_eudex('', '', 'fibonacci'), 1)
2402
2403
        self.assertEqual(sim_eudex('Niall', 'Niall'), 1)
2404
        self.assertEqual(sim_eudex('Niall', 'Niall', None), 1)
2405
        self.assertEqual(sim_eudex('Niall', 'Niall', 'fibonacci'), 1)
2406
2407
        self.assertAlmostEqual(sim_eudex('Niall', 'Neil'), 0.99901961)
2408
        self.assertAlmostEqual(sim_eudex('Niall', 'Neil', None), 0.88888889)
2409
        self.assertAlmostEqual(sim_eudex('Niall', 'Neil', 'fibonacci'),
2410
                               0.99712644)
2411
2412
        self.assertAlmostEqual(sim_eudex('Niall', 'Colin'), 0.74313725)
2413
        self.assertAlmostEqual(sim_eudex('Niall', 'Colin', None), 0.83333333)
2414
        self.assertAlmostEqual(sim_eudex('Niall', 'Colin', 'fibonacci'),
2415
                               0.79022989)
2416
2417
2418
class Sift4TestCases(unittest.TestCase):
2419
    """Test Sift4 functions.
2420
2421
    abydos.distance.sift4_simplest, sift4_common, sim_sift4, & sim_sift4
2422
    """
2423
2424
    def test_sift4_simplest(self):
2425
        """Test abydos.distance.sift4_simplest."""
2426
        # tests copied from Lukas Benedix's post at
2427
        # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
2428
        self.assertEqual(sift4_simplest('', ''), 0)
2429
        self.assertEqual(sift4_simplest('a', ''), 1)
2430
        self.assertEqual(sift4_simplest('', 'a'), 1)
2431
        self.assertEqual(sift4_simplest('abc', ''), 3)
2432
        self.assertEqual(sift4_simplest('', 'abc'), 3)
2433
2434
        self.assertEqual(sift4_simplest('a', 'a'), 0)
2435
        self.assertEqual(sift4_simplest('abc', 'abc'), 0)
2436
2437
        self.assertEqual(sift4_simplest('a', 'ab'), 1)
2438
        self.assertEqual(sift4_simplest('ac', 'abc'), 1)
2439
        self.assertEqual(sift4_simplest('abcdefg', 'xabxcdxxefxgx'), 10)
2440
2441
        self.assertEqual(sift4_simplest('ab', 'b'), 1)
2442
        self.assertEqual(sift4_simplest('ab', 'a'), 1)
2443
        self.assertEqual(sift4_simplest('abc', 'ac'), 1)
2444
        self.assertEqual(sift4_simplest('xabxcdxxefxgx', 'abcdefg'), 10)
2445
2446
        self.assertEqual(sift4_simplest('a', 'b'), 1)
2447
        self.assertEqual(sift4_simplest('ab', 'ac'), 1)
2448
        self.assertEqual(sift4_simplest('ac', 'bc'), 1)
2449
        self.assertEqual(sift4_simplest('abc', 'axc'), 1)
2450
        self.assertEqual(sift4_simplest('xabxcdxxefxgx', '1ab2cd34ef5g6'), 6)
2451
2452
        self.assertEqual(sift4_simplest('example', 'samples'), 2)
2453
        self.assertEqual(sift4_simplest('sturgeon', 'urgently'), 4)
2454
        self.assertEqual(sift4_simplest('levenshtein', 'frankenstein'), 10)
2455
        self.assertEqual(sift4_simplest('distance', 'difference'), 7)
2456
2457
        # Tests copied from
2458
        # https://github.com/tdebatty/java-string-similarity/blob/master/src/test/java/info/debatty/java/stringsimilarity/experimental/Sift4Test.java
2459
        self.assertEqual(sift4_simplest('This is the first string',
2460
                                        'And this is another string', 5), 13)
2461
        self.assertEqual(sift4_simplest('Lorem ipsum dolor sit amet, ' +
2462
                                        'consectetur adipiscing elit.',
2463
                                        'Amet Lorm ispum dolor sit amet, ' +
2464
                                        'consetetur adixxxpiscing elit.',
2465
                                        10), 20)
2466
2467
    def test_sift4_common(self):
2468
        """Test abydos.distance.sift4_common."""
2469
        # tests copied from Lukas Benedix's post at
2470
        # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
2471
        self.assertEqual(sift4_common('', ''), 0)
2472
        self.assertEqual(sift4_common('a', ''), 1)
2473
        self.assertEqual(sift4_common('', 'a'), 1)
2474
        self.assertEqual(sift4_common('abc', ''), 3)
2475
        self.assertEqual(sift4_common('', 'abc'), 3)
2476
2477
        self.assertEqual(sift4_common('a', 'a'), 0)
2478
        self.assertEqual(sift4_common('abc', 'abc'), 0)
2479
2480
        self.assertEqual(sift4_common('a', 'ab'), 1)
2481
        self.assertEqual(sift4_common('ac', 'abc'), 1)
2482
        self.assertEqual(sift4_common('abcdefg', 'xabxcdxxefxgx'), 7)
2483
2484
        self.assertEqual(sift4_common('ab', 'b'), 1)
2485
        self.assertEqual(sift4_common('ab', 'a'), 1)
2486
        self.assertEqual(sift4_common('abc', 'ac'), 1)
2487
        self.assertEqual(sift4_common('xabxcdxxefxgx', 'abcdefg'), 7)
2488
2489
        self.assertEqual(sift4_common('a', 'b'), 1)
2490
        self.assertEqual(sift4_common('ab', 'ac'), 1)
2491
        self.assertEqual(sift4_common('ac', 'bc'), 1)
2492
        self.assertEqual(sift4_common('abc', 'axc'), 1)
2493
        self.assertEqual(sift4_common('xabxcdxxefxgx', '1ab2cd34ef5g6'), 6)
2494
2495
        self.assertEqual(sift4_common('example', 'samples'), 2)
2496
        self.assertEqual(sift4_common('sturgeon', 'urgently'), 3)
2497
        self.assertEqual(sift4_common('levenshtein', 'frankenstein'), 6)
2498
        self.assertEqual(sift4_common('distance', 'difference'), 5)
2499
2500
        # Tests copied from
2501
        # https://github.com/tdebatty/java-string-similarity/blob/master/src/test/java/info/debatty/java/stringsimilarity/experimental/Sift4Test.java
2502
        self.assertEqual(sift4_common('This is the first string',
2503
                                      'And this is another string', 5), 11)
2504
        self.assertEqual(sift4_common('Lorem ipsum dolor sit amet, ' +
2505
                                      'consectetur adipiscing elit.',
2506
                                      'Amet Lorm ispum dolor sit amet, ' +
2507
                                      'consetetur adixxxpiscing elit.',
2508
                                      10), 12)
2509
2510
        # cases with max_distance
2511
        self.assertEqual(sift4_common('example', 'samples', 5, 5), 5)
2512
        self.assertEqual(sift4_common('sturgeon', 'urgently', 5, 5), 5)
2513
        self.assertEqual(sift4_common('levenshtein', 'frankenstein', 5, 5), 5)
2514
        self.assertEqual(sift4_common('distance', 'difference', 5, 5), 5)
2515
2516
    def test_dist_sift4(self):
2517
        """Test abydos.distance.dist_sift4."""
2518
        # tests copied from Lukas Benedix's post at
2519
        # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
2520
        self.assertEqual(dist_sift4('', ''), 0)
2521
        self.assertEqual(dist_sift4('a', ''), 1)
2522
        self.assertEqual(dist_sift4('', 'a'), 1)
2523
        self.assertEqual(dist_sift4('abc', ''), 1)
2524
        self.assertEqual(dist_sift4('', 'abc'), 1)
2525
2526
        self.assertEqual(dist_sift4('a', 'a'), 0)
2527
        self.assertEqual(dist_sift4('abc', 'abc'), 0)
2528
2529
        self.assertEqual(dist_sift4('a', 'ab'), 0.5)
2530
        self.assertEqual(dist_sift4('ac', 'abc'), 1/3)
2531
        self.assertAlmostEqual(dist_sift4('abcdefg', 'xabxcdxxefxgx'),
2532
                               0.538461538)
2533
2534
        self.assertEqual(dist_sift4('ab', 'b'), 0.5)
2535
        self.assertEqual(dist_sift4('ab', 'a'), 0.5)
2536
        self.assertEqual(dist_sift4('abc', 'ac'), 1/3)
2537
        self.assertAlmostEqual(dist_sift4('xabxcdxxefxgx', 'abcdefg'),
2538
                               0.538461538)
2539
2540
        self.assertEqual(dist_sift4('a', 'b'), 1)
2541
        self.assertEqual(dist_sift4('ab', 'ac'), 0.5)
2542
        self.assertEqual(dist_sift4('ac', 'bc'), 0.5)
2543
        self.assertEqual(dist_sift4('abc', 'axc'), 1/3)
2544
        self.assertAlmostEqual(dist_sift4('xabxcdxxefxgx', '1ab2cd34ef5g6'),
2545
                               0.461538461)
2546
2547
        self.assertAlmostEqual(dist_sift4('example', 'samples'), 0.285714285)
2548
        self.assertAlmostEqual(dist_sift4('sturgeon', 'urgently'), 0.375)
2549
        self.assertAlmostEqual(dist_sift4('levenshtein', 'frankenstein'), 0.5)
2550
        self.assertAlmostEqual(dist_sift4('distance', 'difference'), 0.5)
2551
2552
        # Tests copied from
2553
        # https://github.com/tdebatty/java-string-similarity/blob/master/src/test/java/info/debatty/java/stringsimilarity/experimental/Sift4Test.java
2554
        self.assertAlmostEqual(dist_sift4('This is the first string',
2555
                                          'And this is another string',
2556
                                          5), 0.423076923)
2557
        self.assertAlmostEqual(dist_sift4('Lorem ipsum dolor sit amet, ' +
2558
                                          'consectetur adipiscing elit.',
2559
                                          'Amet Lorm ispum dolor sit amet, ' +
2560
                                          'consetetur adixxxpiscing elit.',
2561
                                          10), 0.193548387)
2562
2563
        # cases with max_distance
2564
        self.assertAlmostEqual(dist_sift4('example', 'samples', 5, 5),
2565
                               0.714285714)
2566
        self.assertAlmostEqual(dist_sift4('sturgeon', 'urgently', 5, 5), 0.625)
2567
        self.assertAlmostEqual(dist_sift4('levenshtein', 'frankenstein', 5, 5),
2568
                               0.416666666)
2569
        self.assertAlmostEqual(dist_sift4('distance', 'difference', 5, 5), 0.5)
2570
2571
    def test_sim_sift4(self):
2572
        """Test abydos.distance.sim_sift4."""
2573
        # tests copied from Lukas Benedix's post at
2574
        # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
2575
        self.assertEqual(sim_sift4('', ''), 1)
2576
        self.assertEqual(sim_sift4('a', ''), 0)
2577
        self.assertEqual(sim_sift4('', 'a'), 0)
2578
        self.assertEqual(sim_sift4('abc', ''), 0)
2579
        self.assertEqual(sim_sift4('', 'abc'), 0)
2580
2581
        self.assertEqual(sim_sift4('a', 'a'), 1)
2582
        self.assertEqual(sim_sift4('abc', 'abc'), 1)
2583
2584
        self.assertEqual(sim_sift4('a', 'ab'), 0.5)
2585
        self.assertAlmostEqual(sim_sift4('ac', 'abc'), 2/3)
2586
        self.assertAlmostEqual(sim_sift4('abcdefg', 'xabxcdxxefxgx'),
2587
                               0.461538461)
2588
2589
        self.assertEqual(sim_sift4('ab', 'b'), 0.5)
2590
        self.assertEqual(sim_sift4('ab', 'a'), 0.5)
2591
        self.assertAlmostEqual(sim_sift4('abc', 'ac'), 2/3)
2592
        self.assertAlmostEqual(sim_sift4('xabxcdxxefxgx', 'abcdefg'),
2593
                               0.461538461)
2594
2595
        self.assertEqual(sim_sift4('a', 'b'), 0)
2596
        self.assertEqual(sim_sift4('ab', 'ac'), 0.5)
2597
        self.assertEqual(sim_sift4('ac', 'bc'), 0.5)
2598
        self.assertAlmostEqual(sim_sift4('abc', 'axc'), 2/3)
2599
        self.assertAlmostEqual(sim_sift4('xabxcdxxefxgx', '1ab2cd34ef5g6'),
2600
                               0.538461538)
2601
2602
        self.assertAlmostEqual(sim_sift4('example', 'samples'), 0.714285714)
2603
        self.assertAlmostEqual(sim_sift4('sturgeon', 'urgently'), 0.625)
2604
        self.assertAlmostEqual(sim_sift4('levenshtein', 'frankenstein'), 0.5)
2605
        self.assertAlmostEqual(sim_sift4('distance', 'difference'), 0.5)
2606
2607
        # Tests copied from
2608
        # https://github.com/tdebatty/java-string-similarity/blob/master/src/test/java/info/debatty/java/stringsimilarity/experimental/Sift4Test.java
2609
        self.assertAlmostEqual(sim_sift4('This is the first string',
2610
                                         'And this is another string',
2611
                                         5), 0.576923077)
2612
        self.assertAlmostEqual(sim_sift4('Lorem ipsum dolor sit amet, ' +
2613
                                         'consectetur adipiscing elit.',
2614
                                         'Amet Lorm ispum dolor sit amet, ' +
2615
                                         'consetetur adixxxpiscing elit.',
2616
                                         10), 0.806451613)
2617
2618
        # cases with max_distance
2619
        self.assertAlmostEqual(sim_sift4('example', 'samples', 5, 5),
2620
                               0.285714286)
2621
        self.assertAlmostEqual(sim_sift4('sturgeon', 'urgently', 5, 5), 0.375)
2622
        self.assertAlmostEqual(sim_sift4('levenshtein', 'frankenstein', 5, 5),
2623
                               0.583333333)
2624
        self.assertAlmostEqual(sim_sift4('distance', 'difference', 5, 5), 0.5)
2625
2626
2627
class BaystatTestCases(unittest.TestCase):
2628
    """Test Baystat functions.
2629
2630
    abydos.distance.sim_baystat & .dist_baystat
2631
    """
2632
2633
    def test_sim_baystat(self):
2634
        """Test abydos.distance.sim_editex."""
2635
        # Base cases
2636
        self.assertEqual(sim_baystat('', ''), 1)
2637
        self.assertEqual(sim_baystat('Colin', ''), 0)
2638
        self.assertEqual(sim_baystat('Colin', 'Colin'), 1)
2639
2640
        # Examples given in the paper
2641
        # https://www.statistik.bayern.de/medien/statistik/zensus/zusammenf__hrung_von_datenbest__nden_ohne_numerische_identifikatoren.pdf
2642
        self.assertAlmostEqual(sim_baystat('DRAKOMENA', 'DRAOMINA'), 7/9)
2643
        self.assertAlmostEqual(sim_baystat('RIEKI', 'RILKI'), 4/5)
2644
        self.assertAlmostEqual(sim_baystat('ATANASSIONI', 'ATANASIOU'), 8/11)
2645
        self.assertAlmostEqual(sim_baystat('LIESKOVSKY', 'LIESZKOVSZKY'),
2646
                               10/12)
2647
        self.assertAlmostEqual(sim_baystat('JEANETTE', 'JEANNETTE'), 8/9)
2648
        self.assertAlmostEqual(sim_baystat('JOHANNES', 'JOHAN'), 0.625)
2649
        self.assertAlmostEqual(sim_baystat('JOHANNES', 'HANS'), 0.375)
2650
        self.assertAlmostEqual(sim_baystat('JOHANNES', 'HANNES'), 0.75)
2651
        self.assertAlmostEqual(sim_baystat('ZIMMERMANN', 'SEMMERMANN'), 0.8)
2652
        self.assertAlmostEqual(sim_baystat('ZIMMERMANN', 'ZIMMERER'), 0.6)
2653
        self.assertAlmostEqual(sim_baystat('ZIMMERMANN', 'ZIMMER'), 0.6)
2654
2655
        # Tests to maximize coverage
2656
        self.assertAlmostEqual(sim_baystat('ZIMMERMANN', 'SEMMERMANN',
2657
                                           2, 2, 2), 0.8)
2658
        self.assertAlmostEqual(sim_baystat('ZIMMER', 'ZIMMERMANN'), 0.6)
2659
2660
    def test_dist_baystat(self):
2661
        """Test abydos.distance.dist_editex."""
2662
        # Base cases
2663
        self.assertEqual(dist_baystat('', ''), 0)
2664
        self.assertEqual(dist_baystat('Colin', ''), 1)
2665
        self.assertEqual(dist_baystat('Colin', 'Colin'), 0)
2666
2667
        # Examples given in the paper
2668
        # https://www.statistik.bayern.de/medien/statistik/zensus/zusammenf__hrung_von_datenbest__nden_ohne_numerische_identifikatoren.pdf
2669
        self.assertAlmostEqual(dist_baystat('DRAKOMENA', 'DRAOMINA'), 2/9)
2670
        self.assertAlmostEqual(dist_baystat('RIEKI', 'RILKI'), 1/5)
2671
        self.assertAlmostEqual(dist_baystat('ATANASSIONI', 'ATANASIOU'), 3/11)
2672
        self.assertAlmostEqual(dist_baystat('LIESKOVSKY', 'LIESZKOVSZKY'),
2673
                               2/12)
2674
        self.assertAlmostEqual(dist_baystat('JEANETTE', 'JEANNETTE'), 1/9)
2675
        self.assertAlmostEqual(dist_baystat('JOHANNES', 'JOHAN'), 0.375)
2676
        self.assertAlmostEqual(dist_baystat('JOHANNES', 'HANS'), 0.625)
2677
        self.assertAlmostEqual(dist_baystat('JOHANNES', 'HANNES'), 0.25)
2678
        self.assertAlmostEqual(dist_baystat('ZIMMERMANN', 'SEMMERMANN'), 0.2)
2679
        self.assertAlmostEqual(dist_baystat('ZIMMERMANN', 'ZIMMERER'), 0.4)
2680
        self.assertAlmostEqual(dist_baystat('ZIMMERMANN', 'ZIMMER'), 0.4)
2681
2682
2683
class TypoTestCases(unittest.TestCase):
2684
    """Test Typo functions.
2685
2686
    abydos.distance.typo, sim_typo & .dist_typo
2687
    """
2688
2689
    def test_typo(self):
2690
        """Test abydos.distance.typo."""
2691
        # Base cases
2692
        self.assertEqual(typo('', ''), 0)
2693
        self.assertEqual(typo('', 'typo'), 4)
2694
        self.assertEqual(typo('typo', ''), 4)
2695
2696
        self.assertEqual(typo('asdf', 'zxcv'), 2)
2697
        self.assertEqual(typo('asdf', 'ASDF'), 1)
2698
        self.assertEqual(typo('asdf', 'qsdf'), 0.5)
2699
2700
        self.assertAlmostEqual(typo('asdf', 'asdt', metric='euclidean'),
2701
                               0.70710677)
2702
        self.assertAlmostEqual(typo('asdf', 'asdt', metric='manhattan'),
2703
                               1)
2704
        self.assertAlmostEqual(typo('asdf', 'asdt', metric='log-euclidean'),
2705
                               0.4406868)
2706
        self.assertAlmostEqual(typo('asdf', 'asdt', metric='log-manhattan'),
2707
                               0.54930615)
2708
2709
        self.assertRaises(ValueError, typo, 'asdf', 'Ösdf')
2710
2711
    def test_sim_typo(self):
2712
        """Test abydos.distance.sim_typo."""
2713
        # Base cases
2714
        self.assertEqual(sim_typo('', ''), 1)
2715
        self.assertEqual(sim_typo('', 'typo'), 0)
2716
        self.assertEqual(sim_typo('typo', ''), 0)
2717
2718
        self.assertEqual(sim_typo('asdf', 'zxcv'), 0.5)
2719
        self.assertEqual(sim_typo('asdf', 'ASDF'), 0.75)
2720
        self.assertEqual(sim_typo('asdf', 'qsdf'), 0.875)
2721
2722
        self.assertAlmostEqual(sim_typo('asdf', 'asdt', metric='euclidean'),
2723
                               1-(0.70710677/4))
2724
        self.assertAlmostEqual(sim_typo('asdf', 'asdt', metric='manhattan'),
2725
                               0.75)
2726
        self.assertAlmostEqual(sim_typo('asdf', 'asdt',
2727
                                        metric='log-euclidean'),
2728
                               1-(0.4406868/4))
2729
        self.assertAlmostEqual(sim_typo('asdf', 'asdt',
2730
                                        metric='log-manhattan'),
2731
                               1-(0.54930615/4))
2732
2733
    def test_dist_typo(self):
2734
        """Test abydos.distance.dist_typo."""
2735
        # Base cases
2736
        self.assertEqual(dist_typo('', ''), 0)
2737
        self.assertEqual(dist_typo('', 'typo'), 1)
2738
        self.assertEqual(dist_typo('typo', ''), 1)
2739
2740
        self.assertEqual(dist_typo('asdf', 'zxcv'), 0.5)
2741
        self.assertEqual(dist_typo('asdf', 'ASDF'), 0.25)
2742
        self.assertEqual(dist_typo('asdf', 'qsdf'), 0.125)
2743
2744
        self.assertAlmostEqual(dist_typo('asdf', 'asdt', metric='euclidean'),
2745
                               0.70710677/4)
2746
        self.assertAlmostEqual(dist_typo('asdf', 'asdt', metric='manhattan'),
2747
                               0.25)
2748
        self.assertAlmostEqual(dist_typo('asdf', 'asdt',
2749
                                         metric='log-euclidean'), 0.4406868/4)
2750
        self.assertAlmostEqual(dist_typo('asdf', 'asdt',
2751
                                         metric='log-manhattan'), 0.54930615/4)
2752
2753
2754
class IndelTestCases(unittest.TestCase):
2755
    """Test indel functions.
2756
2757
    abydos.distance.sim_indel & .dist_indel
2758
    """
2759
2760
    def test_sim_indel(self):
2761
        """Test abydos.distance.sim_indel."""
2762
        # Base cases
2763
        self.assertEqual(sim_indel('', ''), 1)
2764
        self.assertEqual(sim_indel('a', ''), 0)
2765
        self.assertEqual(sim_indel('', 'a'), 0)
2766
        self.assertEqual(sim_indel('abc', ''), 0)
2767
        self.assertEqual(sim_indel('', 'abc'), 0)
2768
        self.assertEqual(sim_indel('abcd', 'efgh'), 0)
2769
2770
        self.assertAlmostEqual(sim_indel('Nigel', 'Niall'), 0.6)
2771
        self.assertAlmostEqual(sim_indel('Niall', 'Nigel'), 0.6)
2772
        self.assertAlmostEqual(sim_indel('Colin', 'Coiln'), 0.8)
2773
        self.assertAlmostEqual(sim_indel('Coiln', 'Colin'), 0.8)
2774
2775
    def test_dist_indel(self):
2776
        """Test abydos.distance.dist_indel."""
2777
        # Base cases
2778
        self.assertEqual(dist_indel('', ''), 0)
2779
        self.assertEqual(dist_indel('a', ''), 1)
2780
        self.assertEqual(dist_indel('', 'a'), 1)
2781
        self.assertEqual(dist_indel('abc', ''), 1)
2782
        self.assertEqual(dist_indel('', 'abc'), 1)
2783
        self.assertEqual(dist_indel('abcd', 'efgh'), 1)
2784
2785
        self.assertAlmostEqual(dist_indel('Nigel', 'Niall'), 0.4)
2786
        self.assertAlmostEqual(dist_indel('Niall', 'Nigel'), 0.4)
2787
        self.assertAlmostEqual(dist_indel('Colin', 'Coiln'), 0.2)
2788
        self.assertAlmostEqual(dist_indel('Coiln', 'Colin'), 0.2)
2789
2790
2791
class SynonameTestCases(unittest.TestCase):
2792
    """Test Synoname functions.
2793
2794
    abydos.distance._synoname_strip_punct, _synoname_word_approximation, &
2795
    synoname
2796
    """
2797
2798
    def test_synoname_strip_punct(self):
2799
        """Test abydos.distance._synoname_strip_punct."""
2800
        # Base cases
2801
        self.assertEqual(_synoname_strip_punct(''), '')
2802
        self.assertEqual(_synoname_strip_punct('abcdefg'), 'abcdefg')
2803
        self.assertEqual(_synoname_strip_punct('a\'b-c,d!e:f%g'), 'abcdefg')
2804
2805
    def test_synoname_word_approximation(self):
2806
        """Test abydos.distance._synoname_word_approximation."""
2807
        # Base cases
2808
        self.assertEqual(_synoname_word_approximation('', ''), 0)
2809
2810
        self.assertEqual(
2811
            _synoname_word_approximation('di Domenico di Bonaventura',
2812
                                         'di Tomme di Nuto',
2813
                                         'Cosimo', 'Luca'), 0.4)
2814
        self.assertEqual(
2815
            _synoname_word_approximation('Antonello da Messina',
2816
                                         'Messina', '', 'Antonello da',
2817
                                         {'gen_conflict': False,
2818
                                          'roman_conflict': False,
2819
                                          'src_specials':
2820
                                              [(35, 'b'), (35, 'c')],
2821
                                          'tar_specials':
2822
                                              [(35, 'b'), (35, 'c')]}), 0)
2823
        self.assertEqual(
2824
            _synoname_word_approximation('louis ii', 'louis ii',
2825
                                         'sr jean', 'sr  pierre',
2826
                                         {'gen_conflict': False,
2827
                                          'roman_conflict': False,
2828
                                          'src_specials':
2829
                                              [(49, 'b'), (68, 'd'),
2830
                                               (121, 'b')],
2831
                                          'tar_specials':
2832
                                              [(49, 'b'), (68, 'd'),
2833
                                               (121, 'b')]}), 0)
2834
        self.assertEqual(
2835
            _synoname_word_approximation('louis ii', 'louis ii',
2836
                                         'il giovane', 'sr cadet',
2837
                                         {'gen_conflict': False,
2838
                                          'roman_conflict': False,
2839
                                          'src_specials':
2840
                                              [(46, 'a'), (49, 'b'),
2841
                                               (52, 'a'), (68, 'd')],
2842
                                          'tar_specials':
2843
                                              [(8, 'a'), (49, 'b'),
2844
                                               (68, 'd'), (121, 'a')]}), 1)
2845
        self.assertAlmostEqual(
2846
            _synoname_word_approximation('louis ii', 'louis ii',
2847
                                         'ste.-geo ste.', 'ste.-jo ste.',
2848
                                         {'gen_conflict': False,
2849
                                          'roman_conflict': False,
2850
                                          'src_specials':
2851
                                              [(49, 'b'), (68, 'd'),
2852
                                               (127, 'b'), (127, 'X')],
2853
                                          'tar_specials':
2854
                                              [(49, 'b'), (68, 'd'),
2855
                                               (127, 'b'), (127, 'X')]}), 2/3)
2856
        self.assertAlmostEqual(
2857
            _synoname_word_approximation('louis ii', 'louis',
2858
                                         'ste.-geo ste.', '',
2859
                                         {'gen_conflict': False,
2860
                                          'roman_conflict': False,
2861
                                          'src_specials':
2862
                                              [(49, 'b'), (68, 'd'),
2863
                                               (127, 'b'), (127, 'X')],
2864
                                          'tar_specials': []}), 0)
2865
        self.assertAlmostEqual(
2866
            _synoname_word_approximation('lou ii', 'louis', 'louis iv', 'ste.',
2867
                                         {}), 0)
2868
        self.assertEqual(
2869
            _synoname_word_approximation('ren', 'loren ste.', '', '',
2870
                                         {'tar_specials': [(68, 'd'),
2871
                                                           (127, 'X')],
2872
                                          'src_specials': [(0, '')]}), 1)
2873
2874
    def test_synoname(self):
2875
        """Test abydos.distance.synoname."""
2876
        # Base cases
2877
        self.assertEqual(synoname('', ''), 1)
2878
        self.assertEqual(synoname('', '', tests=['exact']), 1)
2879
        self.assertEqual(synoname('', '', tests=[]), 13)
2880
        self.assertEqual(synoname('', '', tests=['nonsense-test']), 13)
2881
        self.assertEqual(synoname('', '', ret_name=True), 'exact')
2882
2883
        # Test input formats
2884
        self.assertEqual(synoname(('Brueghel II (the Younger)', 'Pieter',
2885
                                   'Workshop of'),
2886
                                  ('Brueghel II (the Younger)', 'Pieter',
2887
                                   'Workshop of')), 1)
2888
        self.assertEqual(synoname('Brueghel II (the Younger)#Pieter#' +
2889
                                  'Workshop of',
2890
                                  'Brueghel II (the Younger)#Pieter#' +
2891
                                  'Workshop of'), 1)
2892
        self.assertEqual(synoname('22#Brueghel II (the Younger)#Pieter#' +
2893
                                  'Workshop of',
2894
                                  '44#Brueghel II (the Younger)#Pieter#' +
2895
                                  'Workshop of'), 1)
2896
2897
        # approx_c tests
2898
        self.assertEqual(synoname(('Master of Brueghel II (the Younger)',
2899
                                   'Pieter', 'Workshop of'),
2900
                                  ('Brueghel I (the Elder)', 'Pieter',
2901
                                   'Workshop of')), 13)
2902
        self.assertEqual(synoname(('Master of Brueghel II',
2903
                                   'Pieter', 'Workshop of'),
2904
                                  ('Master known as the Brueghel II', 'Pieter',
2905
                                   'Workshop of')), 10)
2906
2907
        # Types 1-12
2908
        self.assertEqual(synoname(('Brueghel', 'Pieter', ''),
2909
                                  ('Brueghel', 'Pieter', ''),
2910
                                  ret_name=True), 'exact')
2911
2912
        self.assertEqual(synoname(('Brueghel II', 'Pieter', ''),
2913
                                  ('Brueghel I', 'Pieter', ''),
2914
                                  ret_name=True), 'no_match')
2915
        self.assertEqual(synoname(('Breghel', 'Pieter', ''),
2916
                                  ('Brueghel', 'Pieter', ''),
2917
                                  ret_name=True), 'omission')
2918
        self.assertEqual(synoname(('Brueghel', 'Pieter', ''),
2919
                                  ('Breghel', 'Pieter', ''),
2920
                                  ret_name=True), 'omission')
2921
        self.assertEqual(synoname(('Brueghel', 'Piter', ''),
2922
                                  ('Brueghel', 'Pieter', ''),
2923
                                  ret_name=True), 'omission')
2924
        self.assertEqual(synoname(('Brueghel', 'Pieter', ''),
2925
                                  ('Brueghel', 'Piter', ''),
2926
                                  ret_name=True), 'omission')
2927
        self.assertEqual(synoname(('Brughel', 'Pieter', ''),
2928
                                  ('Breghel', 'Pieter', ''),
2929
                                  ret_name=True), 'substitution')
2930
        self.assertEqual(synoname(('Breughel', 'Peter', ''),
2931
                                  ('Breughel', 'Piter', ''),
2932
                                  ret_name=True), 'substitution')
2933
        self.assertEqual(synoname(('Brueghel', 'Pieter', ''),
2934
                                  ('Breughel', 'Pieter', ''),
2935
                                  ret_name=True), 'transposition')
2936
        self.assertEqual(synoname(('Brueghel', 'Peiter', ''),
2937
                                  ('Brueghel', 'Pieter', ''),
2938
                                  ret_name=True), 'transposition')
2939
2940
        self.assertEqual(synoname(('Brueghel:', 'Pieter', ''),
2941
                                  ('Brueghel', 'Pi-eter', ''),
2942
                                  ret_name=True), 'punctuation')
2943
        self.assertEqual(synoname(('Brueghel,', 'Pieter', ''),
2944
                                  ('Brueghel', 'Pieter...', ''),
2945
                                  ret_name=True), 'punctuation')
2946
        self.assertEqual(synoname(('Seu rat', 'George Pierre', ''),
2947
                                  ('Seu-rat', 'George-Pierre', ''),
2948
                                  ret_name=True), 'punctuation')
2949
        self.assertEqual(synoname(('Picasso', '', ''),
2950
                                  ('Picasso', 'Pablo', ''),
2951
                                  ret_name=True), 'no_first')
2952
        self.assertEqual(synoname(('Pereira', 'I. R.', ''),
2953
                                  ('Pereira', 'Irene Rice', ''),
2954
                                  ret_name=True), 'initials')
2955
        self.assertEqual(synoname(('Pereira', 'I.', ''),
2956
                                  ('Pereira', 'Irene Rice', ''),
2957
                                  ret_name=True), 'initials')
2958
        self.assertNotEqual(synoname(('Pereira', 'I. R.', ''),
2959
                                     ('Pereira', 'I. Smith', ''),
2960
                                     ret_name=True), 'initials')
2961
        self.assertNotEqual(synoname(('Pereira', 'I. R. S.', ''),
2962
                                     ('Pereira', 'I. S. R.', ''),
2963
                                     ret_name=True), 'initials')
2964
        self.assertEqual(synoname(('de Goya', 'Francisco', ''),
2965
                                  ('de Goya y Lucientes', 'Francisco', ''),
2966
                                  ret_name=True), 'extension')
2967
        self.assertEqual(synoname(('Seurat', 'George', ''),
2968
                                  ('Seurat', 'George-Pierre', ''),
2969
                                  ret_name=True), 'extension')
2970
        self.assertEqual(synoname(('Gericault', 'Theodore', ''),
2971
                                  ('Gericault', 'Jean Louis Andre Theodore',
2972
                                   ''),
2973
                                  ret_name=True), 'inclusion')
2974
        self.assertEqual(synoname(('Dore', 'Gustave', ''),
2975
                                  ('Dore', 'Paul Gustave Louis Christophe',
2976
                                   ''),
2977
                                  ret_name=True), 'inclusion')
2978
2979
        self.assertEqual(synoname(('Rosetti', 'Dante Gabriel', ''),
2980
                                  ('Rosetti', 'Gabriel Charles Dante', ''),
2981
                                  ret_name=True), 'word_approx')
2982
        self.assertEqual(synoname(('di Domenico di Bonaventura', 'Cosimo', ''),
2983
                                  ('di Tomme di Nuto', 'Luca', ''),
2984
                                  ret_name=True), 'no_match')
2985
        self.assertEqual(synoname(('Pereira', 'I. R.', ''),
2986
                                  ('Pereira', 'I. Smith', ''),
2987
                                  ret_name=True), 'word_approx')
2988
        self.assertEqual(synoname(('Antonello da Messina', '', ''),
2989
                                  ('Messina', 'Antonello da', ''),
2990
                                  ret_name=True), 'confusions')
2991
        self.assertEqual(synoname(('Brueghel', 'Pietter', ''),
2992
                                  ('Bruegghel', 'Pieter', ''),
2993
                                  ret_name=True), 'char_approx')
2994
2995
2996
class SimDistTestCases(unittest.TestCase):
2997
    """Test generic sim & dist functions.
2998
2999
    abydos.distance.sim & .dist
3000
    """
3001
3002
    def test_sim(self):
3003
        """Test abydos.distance.sim."""
3004
        self.assertEqual(sim('Niall', 'Nigel'),
3005
                         sim_levenshtein('Niall', 'Nigel'))
3006
        self.assertRaises(AttributeError, sim, 'abc', 'abc', 0)
3007
3008
    def test_dist(self):
3009
        """Test abydos.distance.dist."""
3010
        self.assertEqual(dist('Niall', 'Nigel'),
3011
                         dist_levenshtein('Niall', 'Nigel'))
3012
        self.assertRaises(AttributeError, dist, 'abc', 'abc', 0)
3013
3014
3015
if __name__ == '__main__':
3016
    unittest.main()
3017