Test Failed
Push — master ( d1b33f...9f504a )
by Chris
15:58
created

ChebyshevTestCases.test_dist_chebyshev()   A

Complexity

Conditions 1

Size

Total Lines 25
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 19
nop 1
dl 0
loc 25
rs 9.45
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.tests.test_distance.
20
21
This module contains unit tests for abydos.distance
22
"""
23
24
from __future__ import division, unicode_literals
25
26
import math
27
import os
28
import pkgutil
29
import sys
30
import unittest
31
from difflib import SequenceMatcher
32
33
from abydos.compression import ac_train
34
from abydos.distance import _synoname_strip_punct, bag, chebyshev, \
35
    damerau_levenshtein, dist, dist_bag, dist_baystat, dist_chebyshev, \
36
    dist_compression, dist_cosine, dist_damerau, dist_dice, dist_editex, \
37
    dist_euclidean, dist_hamming, dist_ident, dist_indel, dist_jaccard, \
38
    dist_jaro_winkler, dist_lcsseq,  dist_lcsstr, dist_length, \
39
    dist_levenshtein, dist_manhattan, dist_minkowski, dist_mlipns, \
40
    dist_monge_elkan, dist_mra, dist_overlap, dist_prefix, \
41
    dist_ratcliff_obershelp, dist_sift4, dist_strcmp95, dist_suffix, \
42
    dist_tversky, dist_typo, dist_eudex, editex, euclidean, eudex_hamming, \
43
    gotoh, hamming, lcsseq, lcsstr, levenshtein, manhattan, minkowski, \
44
    mra_compare, needleman_wunsch, sift4_simplest, sift4_common, sim, \
45
    sim_bag, sim_baystat, sim_chebyshev, sim_compression, sim_cosine, \
46
    sim_damerau, sim_dice, sim_editex, sim_euclidean, sim_hamming, sim_ident, \
47
    sim_indel, sim_jaccard, sim_jaro_winkler, sim_lcsseq, sim_lcsstr, \
48
    sim_length, sim_levenshtein, sim_manhattan, sim_matrix, sim_minkowski, \
49
    sim_mlipns, sim_monge_elkan, sim_mra, sim_overlap, sim_prefix, \
50
    sim_ratcliff_obershelp, sim_sift4, sim_strcmp95, sim_suffix, \
51
    sim_tanimoto, sim_tversky, sim_typo, sim_eudex, smith_waterman, \
52
    synoname, synoname_word_approximation, tanimoto, typo
53
from abydos.qgram import QGrams
54
55
from six.moves import range
56
57
TESTDIR = os.path.dirname(__file__)
58
59
NIALL = ('Niall', 'Neal', 'Neil', 'Njall', 'Njáll', 'Nigel', 'Neel', 'Nele',
60
         'Nigelli', 'Nel', 'Kneale', 'Uí Néill', 'O\'Neill', 'MacNeil',
61
         'MacNele', 'Niall Noígíallach')
62
63
COLIN = ('Colin', 'Collin', 'Cullen', 'Cuilen', 'Cailean', 'MacCailean',
64
         'Cuilén', 'Colle', 'Calum', 'Callum', 'Colinn', 'Colon', 'Colynn',
65
         'Col', 'Cole', 'Nicolas', 'Nicholas', 'Cailean Mór Caimbeul')
66
67
68
class LevenshteinTestCases(unittest.TestCase):
69
    """Test Levenshtein functions.
70
71
    abydos.distance.levenshtein, .dist_levenshtein,
72
    .sim_levenshtein, .damerau, .dist_damerau, & .sim_damerau
73
    """
74
75
    def test_levenshtein(self):
76
        """Test abydos.distance.levenshtein."""
77
        self.assertEqual(levenshtein('', ''), 0)
78
79
        # http://oldfashionedsoftware.com/tag/levenshtein-distance/
80
        self.assertEqual(levenshtein('a', ''), 1)
81
        self.assertEqual(levenshtein('', 'a'), 1)
82
        self.assertEqual(levenshtein('abc', ''), 3)
83
        self.assertEqual(levenshtein('', 'abc'), 3)
84
        self.assertEqual(levenshtein('', ''), 0)
85
        self.assertEqual(levenshtein('a', 'a'), 0)
86
        self.assertEqual(levenshtein('abc', 'abc'), 0)
87
        self.assertEqual(levenshtein('', 'a'), 1)
88
        self.assertEqual(levenshtein('a', 'ab'), 1)
89
        self.assertEqual(levenshtein('b', 'ab'), 1)
90
        self.assertEqual(levenshtein('ac', 'abc'), 1)
91
        self.assertEqual(levenshtein('abcdefg', 'xabxcdxxefxgx'), 6)
92
        self.assertEqual(levenshtein('a', ''), 1)
93
        self.assertEqual(levenshtein('ab', 'a'), 1)
94
        self.assertEqual(levenshtein('ab', 'b'), 1)
95
        self.assertEqual(levenshtein('abc', 'ac'), 1)
96
        self.assertEqual(levenshtein('xabxcdxxefxgx', 'abcdefg'), 6)
97
        self.assertEqual(levenshtein('a', 'b'), 1)
98
        self.assertEqual(levenshtein('ab', 'ac'), 1)
99
        self.assertEqual(levenshtein('ac', 'bc'), 1)
100
        self.assertEqual(levenshtein('abc', 'axc'), 1)
101
        self.assertEqual(levenshtein('xabxcdxxefxgx', '1ab2cd34ef5g6'), 6)
102
        self.assertEqual(levenshtein('example', 'samples'), 3)
103
        self.assertEqual(levenshtein('sturgeon', 'urgently'), 6)
104
        self.assertEqual(levenshtein('levenshtein', 'frankenstein'), 6)
105
        self.assertEqual(levenshtein('distance', 'difference'), 5)
106
        self.assertEqual(levenshtein('java was neat', 'scala is great'), 7)
107
108
        # https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
109
        self.assertEqual(levenshtein('CA', 'ABC', 'dam'), 2)
110
        self.assertEqual(levenshtein('CA', 'ABC', 'osa'), 3)
111
112
        # test cost of insert
113
        self.assertEqual(levenshtein('', 'b', 'lev', cost=(5, 7, 10, 10)), 5)
114
        self.assertEqual(levenshtein('', 'b', 'osa', cost=(5, 7, 10, 10)), 5)
115
        self.assertEqual(levenshtein('', 'b', 'dam', cost=(5, 7, 10, 10)), 5)
116
        self.assertEqual(levenshtein('a', 'ab', 'lev', cost=(5, 7, 10, 10)), 5)
117
        self.assertEqual(levenshtein('a', 'ab', 'osa', cost=(5, 7, 10, 10)), 5)
118
        self.assertEqual(levenshtein('a', 'ab', 'dam', cost=(5, 7, 10, 10)), 5)
119
120
        # test cost of delete
121
        self.assertEqual(levenshtein('b', '', 'lev', cost=(5, 7, 10, 10)), 7)
122
        self.assertEqual(levenshtein('b', '', 'osa', cost=(5, 7, 10, 10)), 7)
123
        self.assertEqual(levenshtein('b', '', 'dam', cost=(5, 7, 10, 10)), 7)
124
        self.assertEqual(levenshtein('ab', 'a', 'lev', cost=(5, 7, 10, 10)), 7)
125
        self.assertEqual(levenshtein('ab', 'a', 'osa', cost=(5, 7, 10, 10)), 7)
126
        self.assertEqual(levenshtein('ab', 'a', 'dam', cost=(5, 7, 10, 10)), 7)
127
128
        # test cost of substitute
129
        self.assertEqual(levenshtein('a', 'b', 'lev', cost=(10, 10, 5, 10)), 5)
130
        self.assertEqual(levenshtein('a', 'b', 'osa', cost=(10, 10, 5, 10)), 5)
131
        self.assertEqual(levenshtein('a', 'b', 'dam', cost=(10, 10, 5, 10)), 5)
132
        self.assertEqual(levenshtein('ac', 'bc', 'lev',
133
                                     cost=(10, 10, 5, 10)), 5)
134
        self.assertEqual(levenshtein('ac', 'bc', 'osa',
135
                                     cost=(10, 10, 5, 10)), 5)
136
        self.assertEqual(levenshtein('ac', 'bc', 'dam',
137
                                     cost=(10, 10, 5, 10)), 5)
138
139
        # test cost of transpose
140
        self.assertEqual(levenshtein('ab', 'ba', 'lev',
141
                                     cost=(10, 10, 10, 5)), 20)
142
        self.assertEqual(levenshtein('ab', 'ba', 'osa',
143
                                     cost=(10, 10, 10, 5)), 5)
144
        self.assertEqual(levenshtein('ab', 'ba', 'dam',
145
                                     cost=(5, 5, 10, 5)), 5)
146
        self.assertEqual(levenshtein('abc', 'bac', 'lev',
147
                                     cost=(10, 10, 10, 5)), 20)
148
        self.assertEqual(levenshtein('abc', 'bac', 'osa',
149
                                     cost=(10, 10, 10, 5)), 5)
150
        self.assertEqual(levenshtein('abc', 'bac', 'dam',
151
                                     cost=(5, 5, 10, 5)), 5)
152
        self.assertEqual(levenshtein('cab', 'cba', 'lev',
153
                                     cost=(10, 10, 10, 5)), 20)
154
        self.assertEqual(levenshtein('cab', 'cba', 'osa',
155
                                     cost=(10, 10, 10, 5)), 5)
156
        self.assertEqual(levenshtein('cab', 'cba', 'dam',
157
                                     cost=(5, 5, 10, 5)), 5)
158
159
        # test exception
160
        self.assertRaises(ValueError, levenshtein, 'ab', 'ba', 'dam',
161
                          cost=(10, 10, 10, 5))
162
163
    def test_dist_levenshtein(self):
164
        """Test abydos.distance.dist_levenshtein."""
165
        self.assertEqual(dist_levenshtein('', ''), 0)
166
167
        self.assertEqual(dist_levenshtein('a', 'a'), 0)
168
        self.assertEqual(dist_levenshtein('ab', 'ab'), 0)
169
        self.assertEqual(dist_levenshtein('', 'a'), 1)
170
        self.assertEqual(dist_levenshtein('', 'ab'), 1)
171
        self.assertEqual(dist_levenshtein('a', 'c'), 1)
172
173
        self.assertAlmostEqual(dist_levenshtein('abc', 'ac'), 1/3)
174
        self.assertAlmostEqual(dist_levenshtein('abbc', 'ac'), 1/2)
175
        self.assertAlmostEqual(dist_levenshtein('abbc', 'abc'), 1/4)
176
177
    def test_sim_levenshtein(self):
178
        """Test abydos.distance.sim_levenshtein."""
179
        self.assertEqual(sim_levenshtein('', ''), 1)
180
181
        self.assertEqual(sim_levenshtein('a', 'a'), 1)
182
        self.assertEqual(sim_levenshtein('ab', 'ab'), 1)
183
        self.assertEqual(sim_levenshtein('', 'a'), 0)
184
        self.assertEqual(sim_levenshtein('', 'ab'), 0)
185
        self.assertEqual(sim_levenshtein('a', 'c'), 0)
186
187
        self.assertAlmostEqual(sim_levenshtein('abc', 'ac'), 2/3)
188
        self.assertAlmostEqual(sim_levenshtein('abbc', 'ac'), 1/2)
189
        self.assertAlmostEqual(sim_levenshtein('abbc', 'abc'), 3/4)
190
191
    def test_damerau_levenshtein(self):
192
        """Test abydos.distance.damerau_levenshtein."""
193
        self.assertEqual(damerau_levenshtein('', ''), 0)
194
        self.assertEqual(damerau_levenshtein('CA', 'CA'), 0)
195
        self.assertEqual(damerau_levenshtein('CA', 'ABC'), 2)
196
        self.assertEqual(damerau_levenshtein('', 'b', cost=(5, 7, 10, 10)), 5)
197
        self.assertEqual(damerau_levenshtein('a', 'ab', cost=(5, 7, 10, 10)),
198
                         5)
199
        self.assertEqual(damerau_levenshtein('b', '', cost=(5, 7, 10, 10)), 7)
200
        self.assertEqual(damerau_levenshtein('ab', 'a', cost=(5, 7, 10, 10)),
201
                         7)
202
        self.assertEqual(damerau_levenshtein('a', 'b', cost=(10, 10, 5, 10)),
203
                         5)
204
        self.assertEqual(damerau_levenshtein('ac', 'bc',
205
                                             cost=(10, 10, 5, 10)), 5)
206
        self.assertEqual(damerau_levenshtein('ab', 'ba',
207
                                             cost=(5, 5, 10, 5)), 5)
208
        self.assertEqual(damerau_levenshtein('abc', 'bac',
209
                                             cost=(5, 5, 10, 5)), 5)
210
        self.assertEqual(damerau_levenshtein('cab', 'cba',
211
                                             cost=(5, 5, 10, 5)), 5)
212
        self.assertRaises(ValueError, damerau_levenshtein, 'ab', 'ba',
213
                          cost=(10, 10, 10, 5))
214
215
    def test_dist_damerau(self):
216
        """Test abydos.distance.dist_damerau."""
217
        self.assertEqual(dist_damerau('', ''), 0)
218
219
        self.assertEqual(dist_damerau('a', 'a'), 0)
220
        self.assertEqual(dist_damerau('ab', 'ab'), 0)
221
        self.assertEqual(dist_damerau('', 'a'), 1)
222
        self.assertEqual(dist_damerau('', 'ab'), 1)
223
        self.assertEqual(dist_damerau('a', 'c'), 1)
224
225
        self.assertAlmostEqual(dist_damerau('abc', 'ac'), 1/3)
226
        self.assertAlmostEqual(dist_damerau('abbc', 'ac'), 1/2)
227
        self.assertAlmostEqual(dist_damerau('abbc', 'abc'), 1/4)
228
229
        self.assertAlmostEqual(dist_damerau('CA', 'ABC'), 2/3)
230
        self.assertAlmostEqual(dist_damerau('', 'b', cost=(5, 7, 10, 10)), 1)
231
        self.assertAlmostEqual(dist_damerau('a', 'ab',
232
                                            cost=(5, 7, 10, 10)), 1/2)
233
        self.assertAlmostEqual(dist_damerau('b', '', cost=(5, 7, 10, 10)), 1)
234
        self.assertAlmostEqual(dist_damerau('ab', 'a',
235
                                            cost=(5, 7, 10, 10)), 1/2)
236
        self.assertAlmostEqual(dist_damerau('a', 'b',
237
                                            cost=(10, 10, 5, 10)), 1/2)
238
        self.assertAlmostEqual(dist_damerau('ac', 'bc',
239
                                            cost=(10, 10, 5, 10)), 1/4)
240
        self.assertAlmostEqual(dist_damerau('ab', 'ba',
241
                                            cost=(5, 5, 10, 5)), 1/2)
242
        self.assertAlmostEqual(dist_damerau('abc', 'bac',
243
                                            cost=(5, 5, 10, 5)), 1/3)
244
        self.assertAlmostEqual(dist_damerau('cab', 'cba',
245
                                            cost=(5, 5, 10, 5)), 1/3)
246
        self.assertRaises(ValueError, dist_damerau, 'ab', 'ba',
247
                          cost=(10, 10, 10, 5))
248
249
    def test_sim_damerau(self):
250
        """Test abydos.distance.sim_damerau."""
251
        self.assertEqual(sim_damerau('', ''), 1)
252
253
        self.assertEqual(sim_damerau('a', 'a'), 1)
254
        self.assertEqual(sim_damerau('ab', 'ab'), 1)
255
        self.assertEqual(sim_damerau('', 'a'), 0)
256
        self.assertEqual(sim_damerau('', 'ab'), 0)
257
        self.assertEqual(sim_damerau('a', 'c'), 0)
258
259
        self.assertAlmostEqual(sim_damerau('abc', 'ac'), 2/3)
260
        self.assertAlmostEqual(sim_damerau('abbc', 'ac'), 1/2)
261
        self.assertAlmostEqual(sim_damerau('abbc', 'abc'), 3/4)
262
263
        self.assertAlmostEqual(sim_damerau('CA', 'ABC'), 1/3)
264
        self.assertAlmostEqual(sim_damerau('', 'b', cost=(5, 7, 10, 10)), 0)
265
        self.assertAlmostEqual(sim_damerau('a', 'ab', cost=(5, 7, 10, 10)),
266
                               1/2)
267
        self.assertAlmostEqual(sim_damerau('b', '', cost=(5, 7, 10, 10)), 0)
268
        self.assertAlmostEqual(sim_damerau('ab', 'a', cost=(5, 7, 10, 10)),
269
                               1/2)
270
        self.assertAlmostEqual(sim_damerau('a', 'b', cost=(10, 10, 5, 10)),
271
                               1/2)
272
        self.assertAlmostEqual(sim_damerau('ac', 'bc',
273
                                           cost=(10, 10, 5, 10)), 3/4)
274
        self.assertAlmostEqual(sim_damerau('ab', 'ba',
275
                                           cost=(5, 5, 10, 5)), 1/2)
276
        self.assertAlmostEqual(sim_damerau('abc', 'bac',
277
                                           cost=(5, 5, 10, 5)), 2/3)
278
        self.assertAlmostEqual(sim_damerau('cab', 'cba',
279
                                           cost=(5, 5, 10, 5)), 2/3)
280
        self.assertRaises(ValueError, sim_damerau, 'ab', 'ba',
281
                          cost=(10, 10, 10, 5))
282
283
284
class HammingTestCases(unittest.TestCase):
285
    """Test Hamming functions.
286
287
    abydos.distance.hamming, .dist_hamming, & .sim_hamming
288
    """
289
290
    def test_hamming(self):
291
        """Test abydos.distance.hamming."""
292
        self.assertEqual(hamming('', ''), 0)
293
        self.assertEqual(hamming('', '', False), 0)
294
295
        self.assertEqual(hamming('a', ''), 1)
296
        self.assertEqual(hamming('a', 'a'), 0)
297
        self.assertEqual(hamming('a', 'a', False), 0)
298
        self.assertEqual(hamming('a', 'b'), 1)
299
        self.assertEqual(hamming('a', 'b', False), 1)
300
        self.assertEqual(hamming('abc', 'cba'), 2)
301
        self.assertEqual(hamming('abc', 'cba', False), 2)
302
        self.assertEqual(hamming('abc', ''), 3)
303
        self.assertEqual(hamming('bb', 'cbab'), 3)
304
305
        # test exception
306
        self.assertRaises(ValueError, hamming, 'ab', 'a', False)
307
308
        # https://en.wikipedia.org/wiki/Hamming_distance
309
        self.assertEqual(hamming('karolin', 'kathrin'), 3)
310
        self.assertEqual(hamming('karolin', 'kerstin'), 3)
311
        self.assertEqual(hamming('1011101', '1001001'), 2)
312
        self.assertEqual(hamming('2173896', '2233796'), 3)
313
314
    def test_dist_hamming(self):
315
        """Test abydos.distance.dist_hamming."""
316
        self.assertEqual(dist_hamming('', ''), 0)
317
        self.assertEqual(dist_hamming('', '', False), 0)
318
319
        self.assertEqual(dist_hamming('a', ''), 1)
320
        self.assertEqual(dist_hamming('a', 'a'), 0)
321
        self.assertEqual(dist_hamming('a', 'a', False), 0)
322
        self.assertEqual(dist_hamming('a', 'b'), 1)
323
        self.assertEqual(dist_hamming('a', 'b', False), 1)
324
        self.assertAlmostEqual(dist_hamming('abc', 'cba'), 2/3)
325
        self.assertAlmostEqual(dist_hamming('abc', 'cba', False), 2/3)
326
        self.assertEqual(dist_hamming('abc', ''), 1)
327
        self.assertAlmostEqual(dist_hamming('bb', 'cbab'), 3/4)
328
329
        # test exception
330
        self.assertRaises(ValueError, dist_hamming, 'ab', 'a', False)
331
332
        # https://en.wikipedia.org/wiki/Hamming_distance
333
        self.assertAlmostEqual(dist_hamming('karolin', 'kathrin'), 3/7)
334
        self.assertAlmostEqual(dist_hamming('karolin', 'kerstin'), 3/7)
335
        self.assertAlmostEqual(dist_hamming('1011101', '1001001'), 2/7)
336
        self.assertAlmostEqual(dist_hamming('2173896', '2233796'), 3/7)
337
338
    def test_sim_hamming(self):
339
        """Test abydos.distance.sim_hamming."""
340
        self.assertEqual(sim_hamming('', ''), 1)
341
        self.assertEqual(sim_hamming('', '', False), 1)
342
343
        self.assertEqual(sim_hamming('a', ''), 0)
344
        self.assertEqual(sim_hamming('a', 'a'), 1)
345
        self.assertEqual(sim_hamming('a', 'a', False), 1)
346
        self.assertEqual(sim_hamming('a', 'b'), 0)
347
        self.assertEqual(sim_hamming('a', 'b', False), 0)
348
        self.assertAlmostEqual(sim_hamming('abc', 'cba'), 1/3)
349
        self.assertAlmostEqual(sim_hamming('abc', 'cba', False), 1/3)
350
        self.assertEqual(sim_hamming('abc', ''), 0)
351
        self.assertAlmostEqual(sim_hamming('bb', 'cbab'), 1/4)
352
353
        # test exception
354
        self.assertRaises(ValueError, sim_hamming, 'ab', 'a', False)
355
356
        # https://en.wikipedia.org/wiki/Hamming_distance
357
        self.assertAlmostEqual(sim_hamming('karolin', 'kathrin'), 4/7)
358
        self.assertAlmostEqual(sim_hamming('karolin', 'kerstin'), 4/7)
359
        self.assertAlmostEqual(sim_hamming('1011101', '1001001'), 5/7)
360
        self.assertAlmostEqual(sim_hamming('2173896', '2233796'), 4/7)
361
362
363
NONQ_FROM = 'The quick brown fox jumped over the lazy dog.'
364
NONQ_TO = 'That brown dog jumped over the fox.'
365
366
367
class TverskyIndexTestCases(unittest.TestCase):
368
    """Test Tversky functions.
369
370
    abydos.distance.sim_tversky & .dist_tversky
371
    """
372
373
    def test_sim_tversky(self):
374
        """Test abydos.distance.sim_tversky."""
375
        self.assertEqual(sim_tversky('', ''), 1)
376
        self.assertEqual(sim_tversky('nelson', ''), 0)
377
        self.assertEqual(sim_tversky('', 'neilsen'), 0)
378
        self.assertAlmostEqual(sim_tversky('nelson', 'neilsen'), 4/11)
379
380
        self.assertEqual(sim_tversky('', '', 2), 1)
381
        self.assertEqual(sim_tversky('nelson', '', 2), 0)
382
        self.assertEqual(sim_tversky('', 'neilsen', 2), 0)
383
        self.assertAlmostEqual(sim_tversky('nelson', 'neilsen', 2), 4/11)
384
385
        # test valid alpha & beta
386
        self.assertRaises(ValueError, sim_tversky, 'abcd', 'dcba', 2, -1, -1)
387
        self.assertRaises(ValueError, sim_tversky, 'abcd', 'dcba', 2, -1, 0)
388
        self.assertRaises(ValueError, sim_tversky, 'abcd', 'dcba', 2, 0, -1)
389
390
        # test empty QGrams
391
        self.assertAlmostEqual(sim_tversky('nelson', 'neilsen', 7), 0.0)
392
393
        # test unequal alpha & beta
394
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 1), 3/11)
395
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 1, 2), 3/10)
396
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 2), 3/13)
397
398
        # test bias parameter
399
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 1, 1, 0.5),
400
                               7/11)
401
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 1, 0.5), 7/9)
402
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 1, 2, 0.5),
403
                               7/15)
404
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 2, 0.5),
405
                               7/11)
406
407
        # supplied q-gram tests
408
        self.assertEqual(sim_tversky(QGrams(''), QGrams('')), 1)
409
        self.assertEqual(sim_tversky(QGrams('nelson'), QGrams('')), 0)
410
        self.assertEqual(sim_tversky(QGrams(''), QGrams('neilsen')), 0)
411
        self.assertAlmostEqual(sim_tversky(QGrams('nelson'),
412
                                           QGrams('neilsen')), 4/11)
413
414
        # non-q-gram tests
415
        self.assertEqual(sim_tversky('', '', 0), 1)
416
        self.assertEqual(sim_tversky('the quick', '', 0), 0)
417
        self.assertEqual(sim_tversky('', 'the quick', 0), 0)
418
        self.assertAlmostEqual(sim_tversky(NONQ_FROM, NONQ_TO, 0), 1/3)
419
        self.assertAlmostEqual(sim_tversky(NONQ_TO, NONQ_FROM, 0), 1/3)
420
421
    def test_dist_tversky(self):
422
        """Test abydos.distance.dist_tversky."""
423
        self.assertEqual(dist_tversky('', ''), 0)
424
        self.assertEqual(dist_tversky('nelson', ''), 1)
425
        self.assertEqual(dist_tversky('', 'neilsen'), 1)
426
        self.assertAlmostEqual(dist_tversky('nelson', 'neilsen'), 7/11)
427
428
        self.assertEqual(dist_tversky('', '', 2), 0)
429
        self.assertEqual(dist_tversky('nelson', '', 2), 1)
430
        self.assertEqual(dist_tversky('', 'neilsen', 2), 1)
431
        self.assertAlmostEqual(dist_tversky('nelson', 'neilsen', 2), 7/11)
432
433
        # test valid alpha & beta
434
        self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, -1, -1)
435
        self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, -1, 0)
436
        self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, 0, -1)
437
438
        # test empty QGrams
439
        self.assertAlmostEqual(dist_tversky('nelson', 'neilsen', 7), 1.0)
440
441
        # test unequal alpha & beta
442
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 1), 8/11)
443
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 2), 7/10)
444
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 2), 10/13)
445
446
        # test bias parameter
447
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 1, 0.5),
448
                               4/11)
449
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 1, 0.5),
450
                               2/9)
451
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 2, 0.5),
452
                               8/15)
453
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 2, 0.5),
454
                               4/11)
455
456
        # supplied q-gram tests
457
        self.assertEqual(dist_tversky(QGrams(''), QGrams('')), 0)
458
        self.assertEqual(dist_tversky(QGrams('nelson'), QGrams('')), 1)
459
        self.assertEqual(dist_tversky(QGrams(''), QGrams('neilsen')), 1)
460
        self.assertAlmostEqual(dist_tversky(QGrams('nelson'),
461
                                            QGrams('neilsen')), 7/11)
462
463
        # non-q-gram tests
464
        self.assertEqual(dist_tversky('', '', 0), 0)
465
        self.assertEqual(dist_tversky('the quick', '', 0), 1)
466
        self.assertEqual(dist_tversky('', 'the quick', 0), 1)
467
        self.assertAlmostEqual(dist_tversky(NONQ_FROM, NONQ_TO, 0), 2/3)
468
        self.assertAlmostEqual(dist_tversky(NONQ_TO, NONQ_FROM, 0), 2/3)
469
470
471
class DiceTestCases(unittest.TestCase):
472
    """Test Dice functions.
473
474
    abydos.distance.sim_dice & .dist_dice
475
    """
476
477
    def test_sim_dice(self):
478
        """Test abydos.distance.sim_dice."""
479
        self.assertEqual(sim_dice('', ''), 1)
480
        self.assertEqual(sim_dice('nelson', ''), 0)
481
        self.assertEqual(sim_dice('', 'neilsen'), 0)
482
        self.assertAlmostEqual(sim_dice('nelson', 'neilsen'), 8/15)
483
484
        self.assertEqual(sim_dice('', '', 2), 1)
485
        self.assertEqual(sim_dice('nelson', '', 2), 0)
486
        self.assertEqual(sim_dice('', 'neilsen', 2), 0)
487
        self.assertAlmostEqual(sim_dice('nelson', 'neilsen', 2), 8/15)
488
489
        # supplied q-gram tests
490
        self.assertEqual(sim_dice(QGrams(''), QGrams('')), 1)
491
        self.assertEqual(sim_dice(QGrams('nelson'), QGrams('')), 0)
492
        self.assertEqual(sim_dice(QGrams(''), QGrams('neilsen')), 0)
493
        self.assertAlmostEqual(sim_dice(QGrams('nelson'), QGrams('neilsen')),
494
                               8/15)
495
496
        # non-q-gram tests
497
        self.assertEqual(sim_dice('', '', 0), 1)
498
        self.assertEqual(sim_dice('the quick', '', 0), 0)
499
        self.assertEqual(sim_dice('', 'the quick', 0), 0)
500
        self.assertAlmostEqual(sim_dice(NONQ_FROM, NONQ_TO, 0), 1/2)
501
        self.assertAlmostEqual(sim_dice(NONQ_TO, NONQ_FROM, 0), 1/2)
502
503
    def test_dist_dice(self):
504
        """Test abydos.distance.dist_dice."""
505
        self.assertEqual(dist_dice('', ''), 0)
506
        self.assertEqual(dist_dice('nelson', ''), 1)
507
        self.assertEqual(dist_dice('', 'neilsen'), 1)
508
        self.assertAlmostEqual(dist_dice('nelson', 'neilsen'), 7/15)
509
510
        self.assertEqual(dist_dice('', '', 2), 0)
511
        self.assertEqual(dist_dice('nelson', '', 2), 1)
512
        self.assertEqual(dist_dice('', 'neilsen', 2), 1)
513
        self.assertAlmostEqual(dist_dice('nelson', 'neilsen', 2), 7/15)
514
515
        # supplied q-gram tests
516
        self.assertEqual(dist_dice(QGrams(''), QGrams('')), 0)
517
        self.assertEqual(dist_dice(QGrams('nelson'), QGrams('')), 1)
518
        self.assertEqual(dist_dice(QGrams(''), QGrams('neilsen')), 1)
519
        self.assertAlmostEqual(dist_dice(QGrams('nelson'), QGrams('neilsen')),
520
                               7/15)
521
522
        # non-q-gram tests
523
        self.assertEqual(dist_dice('', '', 0), 0)
524
        self.assertEqual(dist_dice('the quick', '', 0), 1)
525
        self.assertEqual(dist_dice('', 'the quick', 0), 1)
526
        self.assertAlmostEqual(dist_dice(NONQ_FROM, NONQ_TO, 0), 1/2)
527
        self.assertAlmostEqual(dist_dice(NONQ_TO, NONQ_FROM, 0), 1/2)
528
529
530
class JaccardTestCases(unittest.TestCase):
531
    """Test Jaccard functions.
532
533
    abydos.distance.sim_jaccard & .dist_jaccard
534
    """
535
536
    def test_sim_jaccard(self):
537
        """Test abydos.distance.sim_jaccard."""
538
        self.assertEqual(sim_jaccard('', ''), 1)
539
        self.assertEqual(sim_jaccard('nelson', ''), 0)
540
        self.assertEqual(sim_jaccard('', 'neilsen'), 0)
541
        self.assertAlmostEqual(sim_jaccard('nelson', 'neilsen'), 4/11)
542
543
        self.assertEqual(sim_jaccard('', '', 2), 1)
544
        self.assertEqual(sim_jaccard('nelson', '', 2), 0)
545
        self.assertEqual(sim_jaccard('', 'neilsen', 2), 0)
546
        self.assertAlmostEqual(sim_jaccard('nelson', 'neilsen', 2), 4/11)
547
548
        # supplied q-gram tests
549
        self.assertEqual(sim_jaccard(QGrams(''), QGrams('')), 1)
550
        self.assertEqual(sim_jaccard(QGrams('nelson'), QGrams('')), 0)
551
        self.assertEqual(sim_jaccard(QGrams(''), QGrams('neilsen')), 0)
552
        self.assertAlmostEqual(sim_jaccard(QGrams('nelson'),
553
                                           QGrams('neilsen')), 4/11)
554
555
        # non-q-gram tests
556
        self.assertEqual(sim_jaccard('', '', 0), 1)
557
        self.assertEqual(sim_jaccard('the quick', '', 0), 0)
558
        self.assertEqual(sim_jaccard('', 'the quick', 0), 0)
559
        self.assertAlmostEqual(sim_jaccard(NONQ_FROM, NONQ_TO, 0), 1/3)
560
        self.assertAlmostEqual(sim_jaccard(NONQ_TO, NONQ_FROM, 0), 1/3)
561
562
    def test_dist_jaccard(self):
563
        """Test abydos.distance.dist_jaccard."""
564
        self.assertEqual(dist_jaccard('', ''), 0)
565
        self.assertEqual(dist_jaccard('nelson', ''), 1)
566
        self.assertEqual(dist_jaccard('', 'neilsen'), 1)
567
        self.assertAlmostEqual(dist_jaccard('nelson', 'neilsen'), 7/11)
568
569
        self.assertEqual(dist_jaccard('', '', 2), 0)
570
        self.assertEqual(dist_jaccard('nelson', '', 2), 1)
571
        self.assertEqual(dist_jaccard('', 'neilsen', 2), 1)
572
        self.assertAlmostEqual(dist_jaccard('nelson', 'neilsen', 2), 7/11)
573
574
        # supplied q-gram tests
575
        self.assertEqual(dist_jaccard(QGrams(''), QGrams('')), 0)
576
        self.assertEqual(dist_jaccard(QGrams('nelson'), QGrams('')), 1)
577
        self.assertEqual(dist_jaccard(QGrams(''), QGrams('neilsen')), 1)
578
        self.assertAlmostEqual(dist_jaccard(QGrams('nelson'),
579
                                            QGrams('neilsen')), 7/11)
580
581
        # non-q-gram tests
582
        self.assertEqual(dist_jaccard('', '', 0), 0)
583
        self.assertEqual(dist_jaccard('the quick', '', 0), 1)
584
        self.assertEqual(dist_jaccard('', 'the quick', 0), 1)
585
        self.assertAlmostEqual(dist_jaccard(NONQ_FROM, NONQ_TO, 0), 2/3)
586
        self.assertAlmostEqual(dist_jaccard(NONQ_TO, NONQ_FROM, 0), 2/3)
587
588
589
class OverlapTestCases(unittest.TestCase):
590
    """Test overlap functions.
591
592
    abydos.distance.sim_overlap & .dist_overlap
593
    """
594
595
    def test_sim_overlap(self):
596
        """Test abydos.distance.sim_overlap."""
597
        self.assertEqual(sim_overlap('', ''), 1)
598
        self.assertEqual(sim_overlap('nelson', ''), 0)
599
        self.assertEqual(sim_overlap('', 'neilsen'), 0)
600
        self.assertAlmostEqual(sim_overlap('nelson', 'neilsen'), 4/7)
601
602
        self.assertEqual(sim_overlap('', '', 2), 1)
603
        self.assertEqual(sim_overlap('nelson', '', 2), 0)
604
        self.assertEqual(sim_overlap('', 'neilsen', 2), 0)
605
        self.assertAlmostEqual(sim_overlap('nelson', 'neilsen', 2), 4/7)
606
607
        # supplied q-gram tests
608
        self.assertEqual(sim_overlap(QGrams(''), QGrams('')), 1)
609
        self.assertEqual(sim_overlap(QGrams('nelson'), QGrams('')), 0)
610
        self.assertEqual(sim_overlap(QGrams(''), QGrams('neilsen')), 0)
611
        self.assertAlmostEqual(sim_overlap(QGrams('nelson'),
612
                                           QGrams('neilsen')), 4/7)
613
614
        # non-q-gram tests
615
        self.assertEqual(sim_overlap('', '', 0), 1)
616
        self.assertEqual(sim_overlap('the quick', '', 0), 0)
617
        self.assertEqual(sim_overlap('', 'the quick', 0), 0)
618
        self.assertAlmostEqual(sim_overlap(NONQ_FROM, NONQ_TO, 0), 4/7)
619
        self.assertAlmostEqual(sim_overlap(NONQ_TO, NONQ_FROM, 0), 4/7)
620
621
    def test_dist_overlap(self):
622
        """Test abydos.distance.dist_overlap."""
623
        self.assertEqual(dist_overlap('', ''), 0)
624
        self.assertEqual(dist_overlap('nelson', ''), 1)
625
        self.assertEqual(dist_overlap('', 'neilsen'), 1)
626
        self.assertAlmostEqual(dist_overlap('nelson', 'neilsen'), 3/7)
627
628
        self.assertEqual(dist_overlap('', '', 2), 0)
629
        self.assertEqual(dist_overlap('nelson', '', 2), 1)
630
        self.assertEqual(dist_overlap('', 'neilsen', 2), 1)
631
        self.assertAlmostEqual(dist_overlap('nelson', 'neilsen', 2), 3/7)
632
633
        # supplied q-gram tests
634
        self.assertEqual(dist_overlap(QGrams(''), QGrams('')), 0)
635
        self.assertEqual(dist_overlap(QGrams('nelson'), QGrams('')), 1)
636
        self.assertEqual(dist_overlap(QGrams(''), QGrams('neilsen')), 1)
637
        self.assertAlmostEqual(dist_overlap(QGrams('nelson'),
638
                                            QGrams('neilsen')), 3/7)
639
640
        # non-q-gram tests
641
        self.assertEqual(dist_overlap('', '', 0), 0)
642
        self.assertEqual(dist_overlap('the quick', '', 0), 1)
643
        self.assertEqual(dist_overlap('', 'the quick', 0), 1)
644
        self.assertAlmostEqual(dist_overlap(NONQ_FROM, NONQ_TO, 0), 3/7)
645
        self.assertAlmostEqual(dist_overlap(NONQ_TO, NONQ_FROM, 0), 3/7)
646
647
648
class TanimotoTestCases(unittest.TestCase):
649
    """Test Tanimoto functions.
650
651
    abydos.distance.sim_tanimoto & .tanimoto
652
    """
653
654
    def test_tanimoto_coeff(self):
655
        """Test abydos.distance.sim_tanimoto."""
656
        self.assertEqual(sim_tanimoto('', ''), 1)
657
        self.assertEqual(sim_tanimoto('nelson', ''), 0)
658
        self.assertEqual(sim_tanimoto('', 'neilsen'), 0)
659
        self.assertAlmostEqual(sim_tanimoto('nelson', 'neilsen'), 4/11)
660
661
        self.assertEqual(sim_tanimoto('', '', 2), 1)
662
        self.assertEqual(sim_tanimoto('nelson', '', 2), 0)
663
        self.assertEqual(sim_tanimoto('', 'neilsen', 2), 0)
664
        self.assertAlmostEqual(sim_tanimoto('nelson', 'neilsen', 2), 4/11)
665
666
        # supplied q-gram tests
667
        self.assertEqual(sim_tanimoto(QGrams(''), QGrams('')), 1)
668
        self.assertEqual(sim_tanimoto(QGrams('nelson'), QGrams('')), 0)
669
        self.assertEqual(sim_tanimoto(QGrams(''), QGrams('neilsen')), 0)
670
        self.assertAlmostEqual(sim_tanimoto(QGrams('nelson'),
671
                                            QGrams('neilsen')), 4/11)
672
673
        # non-q-gram tests
674
        self.assertEqual(sim_tanimoto('', '', 0), 1)
675
        self.assertEqual(sim_tanimoto('the quick', '', 0), 0)
676
        self.assertEqual(sim_tanimoto('', 'the quick', 0), 0)
677
        self.assertAlmostEqual(sim_tanimoto(NONQ_FROM, NONQ_TO, 0), 1/3)
678
        self.assertAlmostEqual(sim_tanimoto(NONQ_TO, NONQ_FROM, 0), 1/3)
679
680
    def test_tanimoto(self):
681
        """Test abydos.distance.tanimoto."""
682
        self.assertEqual(tanimoto('', ''), 0)
683
        self.assertEqual(tanimoto('nelson', ''), float('-inf'))
684
        self.assertEqual(tanimoto('', 'neilsen'), float('-inf'))
685
        self.assertAlmostEqual(tanimoto('nelson', 'neilsen'),
686
                               math.log(4/11, 2))
687
688
        self.assertEqual(tanimoto('', '', 2), 0)
689
        self.assertEqual(tanimoto('nelson', '', 2), float('-inf'))
690
        self.assertEqual(tanimoto('', 'neilsen', 2), float('-inf'))
691
        self.assertAlmostEqual(tanimoto('nelson', 'neilsen', 2),
692
                               math.log(4/11, 2))
693
694
        # supplied q-gram tests
695
        self.assertEqual(tanimoto(QGrams(''), QGrams('')), 0)
696
        self.assertEqual(tanimoto(QGrams('nelson'), QGrams('')), float('-inf'))
697
        self.assertEqual(tanimoto(QGrams(''), QGrams('neilsen')),
698
                         float('-inf'))
699
        self.assertAlmostEqual(tanimoto(QGrams('nelson'), QGrams('neilsen')),
700
                               math.log(4/11, 2))
701
702
        # non-q-gram tests
703
        self.assertEqual(tanimoto('', '', 0), 0)
704
        self.assertEqual(tanimoto('the quick', '', 0), float('-inf'))
705
        self.assertEqual(tanimoto('', 'the quick', 0), float('-inf'))
706
        self.assertAlmostEqual(tanimoto(NONQ_FROM, NONQ_TO, 0),
707
                               math.log(1/3, 2))
708
        self.assertAlmostEqual(tanimoto(NONQ_TO, NONQ_FROM, 0),
709
                               math.log(1/3, 2))
710
711
712
class MinkowskiTestCases(unittest.TestCase):
713
    """Test Minkowski functions.
714
715
    abydos.distance.minkowski, sim_minkowski & .dist_minkowski
716
    """
717
718
    def test_minkowski(self):
719
        """Test abydos.distance.minkowski."""
720
        self.assertEqual(minkowski('', ''), 0)
721
        self.assertEqual(minkowski('nelson', ''), 7)
722
        self.assertEqual(minkowski('', 'neilsen'), 8)
723
        self.assertAlmostEqual(minkowski('nelson', 'neilsen'), 7)
724
725
        self.assertEqual(minkowski('', '', 2), 0)
726
        self.assertEqual(minkowski('nelson', '', 2), 7)
727
        self.assertEqual(minkowski('', 'neilsen', 2), 8)
728
        self.assertAlmostEqual(minkowski('nelson', 'neilsen', 2), 7)
729
730
        # supplied q-gram tests
731
        self.assertEqual(minkowski(QGrams(''), QGrams('')), 0)
732
        self.assertEqual(minkowski(QGrams('nelson'), QGrams('')), 7)
733
        self.assertEqual(minkowski(QGrams(''), QGrams('neilsen')), 8)
734
        self.assertAlmostEqual(minkowski(QGrams('nelson'),
735
                                             QGrams('neilsen')), 7)
736
737
        # non-q-gram tests
738
        self.assertEqual(minkowski('', '', 0), 0)
739
        self.assertEqual(minkowski('the quick', '', 0), 2)
740
        self.assertEqual(minkowski('', 'the quick', 0), 2)
741
        self.assertAlmostEqual(minkowski(NONQ_FROM, NONQ_TO, 0), 8)
742
        self.assertAlmostEqual(minkowski(NONQ_TO, NONQ_FROM, 0), 8)
743
744
        # test l_0 "norm"
745
        self.assertEqual(minkowski('', '', 1, 0), 0)
746
        self.assertEqual(minkowski('a', '', 1, 0), 1)
747
        self.assertEqual(minkowski('a', 'b', 1, 0), 2)
748
        self.assertEqual(minkowski('ab', 'b', 1, 0), 1)
749
        self.assertEqual(minkowski('aab', 'b', 1, 0), 1)
750
        self.assertEqual(minkowski('', '', 1, 0, True), 0)
751
        self.assertEqual(minkowski('a', '', 1, 0, True), 1)
752
        self.assertEqual(minkowski('a', 'b', 1, 0, True), 1)
753
        self.assertEqual(minkowski('ab', 'b', 1, 0, True), 1/2)
754
        self.assertEqual(minkowski('aab', 'b', 1, 0, True), 1/2)
755
        self.assertEqual(minkowski('aaab', 'b', 1, 0, True), 1/2)
756
        self.assertEqual(minkowski('aaab', 'ab', 1, 0, True), 1/2)
757
758
        # test with alphabet
759
        self.assertEqual(minkowski('ab', 'b', 1, alphabet=26), 1)
760
        self.assertEqual(minkowski('ab', 'b', 1, normalize=True, alphabet=26),
761
                         1/26)
762
        self.assertEqual(minkowski('ab', 'b', 1, normalize=True,
763
                                   alphabet='abcdefghijklmnopqrstuvwxyz'),
764
                         1/26)
765
766
    def test_sim_minkowski(self):
767
        """Test abydos.distance.sim_minkowski."""
768
        self.assertEqual(sim_minkowski('', ''), 1)
769
        self.assertEqual(sim_minkowski('nelson', ''), 0)
770
        self.assertEqual(sim_minkowski('', 'neilsen'), 0)
771
        self.assertAlmostEqual(sim_minkowski('nelson', 'neilsen'), 8/15)
772
773
        self.assertEqual(sim_minkowski('', '', 2), 1)
774
        self.assertEqual(sim_minkowski('nelson', '', 2), 0)
775
        self.assertEqual(sim_minkowski('', 'neilsen', 2), 0)
776
        self.assertAlmostEqual(sim_minkowski('nelson', 'neilsen', 2), 8/15)
777
778
        # supplied q-gram tests
779
        self.assertEqual(sim_minkowski(QGrams(''), QGrams('')), 1)
780
        self.assertEqual(sim_minkowski(QGrams('nelson'), QGrams('')), 0)
781
        self.assertEqual(sim_minkowski(QGrams(''), QGrams('neilsen')), 0)
782
        self.assertAlmostEqual(sim_minkowski(QGrams('nelson'),
783
                                             QGrams('neilsen')), 8/15)
784
785
        # non-q-gram tests
786
        self.assertEqual(sim_minkowski('', '', 0), 1)
787
        self.assertEqual(sim_minkowski('the quick', '', 0), 0)
788
        self.assertEqual(sim_minkowski('', 'the quick', 0), 0)
789
        self.assertAlmostEqual(sim_minkowski(NONQ_FROM, NONQ_TO, 0), 1/2)
790
        self.assertAlmostEqual(sim_minkowski(NONQ_TO, NONQ_FROM, 0), 1/2)
791
792
    def test_dist_minkowski(self):
793
        """Test abydos.distance.dist_minkowski."""
794
        self.assertEqual(dist_minkowski('', ''), 0)
795
        self.assertEqual(dist_minkowski('nelson', ''), 1)
796
        self.assertEqual(dist_minkowski('', 'neilsen'), 1)
797
        self.assertAlmostEqual(dist_minkowski('nelson', 'neilsen'), 7/15)
798
799
        self.assertEqual(dist_minkowski('', '', 2), 0)
800
        self.assertEqual(dist_minkowski('nelson', '', 2), 1)
801
        self.assertEqual(dist_minkowski('', 'neilsen', 2), 1)
802
        self.assertAlmostEqual(dist_minkowski('nelson', 'neilsen', 2), 7/15)
803
804
        # supplied q-gram tests
805
        self.assertEqual(dist_minkowski(QGrams(''), QGrams('')), 0)
806
        self.assertEqual(dist_minkowski(QGrams('nelson'), QGrams('')), 1)
807
        self.assertEqual(dist_minkowski(QGrams(''), QGrams('neilsen')), 1)
808
        self.assertAlmostEqual(dist_minkowski(QGrams('nelson'),
809
                                              QGrams('neilsen')), 7/15)
810
811
        # non-q-gram tests
812
        self.assertEqual(dist_minkowski('', '', 0), 0)
813
        self.assertEqual(dist_minkowski('the quick', '', 0), 1)
814
        self.assertEqual(dist_minkowski('', 'the quick', 0), 1)
815
        self.assertAlmostEqual(dist_minkowski(NONQ_FROM, NONQ_TO, 0), 1/2)
816
        self.assertAlmostEqual(dist_minkowski(NONQ_TO, NONQ_FROM, 0), 1/2)
817
818
819
class ManhattanTestCases(unittest.TestCase):
820
    """Test Manhattan functions.
821
822
    abydos.distance.manhattan, sim_manhattan & .dist_manhattan
823
    """
824
825
    def test_manhattan(self):
826
        """Test abydos.distance.manhattan."""
827
        self.assertEqual(manhattan('', ''), 0)
828
        self.assertEqual(manhattan('nelson', ''), 7)
829
        self.assertEqual(manhattan('', 'neilsen'), 8)
830
        self.assertAlmostEqual(manhattan('nelson', 'neilsen'), 7)
831
832
        self.assertEqual(manhattan('', '', 2), 0)
833
        self.assertEqual(manhattan('nelson', '', 2), 7)
834
        self.assertEqual(manhattan('', 'neilsen', 2), 8)
835
        self.assertAlmostEqual(manhattan('nelson', 'neilsen', 2), 7)
836
837
        # supplied q-gram tests
838
        self.assertEqual(manhattan(QGrams(''), QGrams('')), 0)
839
        self.assertEqual(manhattan(QGrams('nelson'), QGrams('')), 7)
840
        self.assertEqual(manhattan(QGrams(''), QGrams('neilsen')), 8)
841
        self.assertAlmostEqual(manhattan(QGrams('nelson'),
842
                                             QGrams('neilsen')), 7)
843
844
        # non-q-gram tests
845
        self.assertEqual(manhattan('', '', 0), 0)
846
        self.assertEqual(manhattan('the quick', '', 0), 2)
847
        self.assertEqual(manhattan('', 'the quick', 0), 2)
848
        self.assertAlmostEqual(manhattan(NONQ_FROM, NONQ_TO, 0), 8)
849
        self.assertAlmostEqual(manhattan(NONQ_TO, NONQ_FROM, 0), 8)
850
851
    def test_sim_manhattan(self):
852
        """Test abydos.distance.sim_manhattan."""
853
        self.assertEqual(sim_manhattan('', ''), 1)
854
        self.assertEqual(sim_manhattan('nelson', ''), 0)
855
        self.assertEqual(sim_manhattan('', 'neilsen'), 0)
856
        self.assertAlmostEqual(sim_manhattan('nelson', 'neilsen'), 8/15)
857
858
        self.assertEqual(sim_manhattan('', '', 2), 1)
859
        self.assertEqual(sim_manhattan('nelson', '', 2), 0)
860
        self.assertEqual(sim_manhattan('', 'neilsen', 2), 0)
861
        self.assertAlmostEqual(sim_manhattan('nelson', 'neilsen', 2), 8/15)
862
863
        # supplied q-gram tests
864
        self.assertEqual(sim_manhattan(QGrams(''), QGrams('')), 1)
865
        self.assertEqual(sim_manhattan(QGrams('nelson'), QGrams('')), 0)
866
        self.assertEqual(sim_manhattan(QGrams(''), QGrams('neilsen')), 0)
867
        self.assertAlmostEqual(sim_manhattan(QGrams('nelson'),
868
                                             QGrams('neilsen')), 8/15)
869
870
        # non-q-gram tests
871
        self.assertEqual(sim_manhattan('', '', 0), 1)
872
        self.assertEqual(sim_manhattan('the quick', '', 0), 0)
873
        self.assertEqual(sim_manhattan('', 'the quick', 0), 0)
874
        self.assertAlmostEqual(sim_manhattan(NONQ_FROM, NONQ_TO, 0), 1/2)
875
        self.assertAlmostEqual(sim_manhattan(NONQ_TO, NONQ_FROM, 0), 1/2)
876
877
    def test_dist_manhattan(self):
878
        """Test abydos.distance.dist_manhattan."""
879
        self.assertEqual(dist_manhattan('', ''), 0)
880
        self.assertEqual(dist_manhattan('nelson', ''), 1)
881
        self.assertEqual(dist_manhattan('', 'neilsen'), 1)
882
        self.assertAlmostEqual(dist_manhattan('nelson', 'neilsen'), 7/15)
883
884
        self.assertEqual(dist_manhattan('', '', 2), 0)
885
        self.assertEqual(dist_manhattan('nelson', '', 2), 1)
886
        self.assertEqual(dist_manhattan('', 'neilsen', 2), 1)
887
        self.assertAlmostEqual(dist_manhattan('nelson', 'neilsen', 2), 7/15)
888
889
        # supplied q-gram tests
890
        self.assertEqual(dist_manhattan(QGrams(''), QGrams('')), 0)
891
        self.assertEqual(dist_manhattan(QGrams('nelson'), QGrams('')), 1)
892
        self.assertEqual(dist_manhattan(QGrams(''), QGrams('neilsen')), 1)
893
        self.assertAlmostEqual(dist_manhattan(QGrams('nelson'),
894
                                              QGrams('neilsen')), 7/15)
895
896
        # non-q-gram tests
897
        self.assertEqual(dist_manhattan('', '', 0), 0)
898
        self.assertEqual(dist_manhattan('the quick', '', 0), 1)
899
        self.assertEqual(dist_manhattan('', 'the quick', 0), 1)
900
        self.assertAlmostEqual(dist_manhattan(NONQ_FROM, NONQ_TO, 0), 1/2)
901
        self.assertAlmostEqual(dist_manhattan(NONQ_TO, NONQ_FROM, 0), 1/2)
902
903
904
class EuclideanTestCases(unittest.TestCase):
905
    """Test Euclidean functions.
906
907
    abydos.distance.euclidean, sim_euclidean & .dist_euclidean
908
    """
909
    def test_euclidean(self):
910
        """Test abydos.distance.euclidean."""
911
        self.assertEqual(euclidean('', ''), 0)
912
        self.assertEqual(euclidean('nelson', ''), 7**0.5)
913
        self.assertEqual(euclidean('', 'neilsen'), 8**0.5)
914
        self.assertAlmostEqual(euclidean('nelson', 'neilsen'), 7**0.5)
915
916
        self.assertEqual(euclidean('', '', 2), 0)
917
        self.assertEqual(euclidean('nelson', '', 2), 7**0.5)
918
        self.assertEqual(euclidean('', 'neilsen', 2), 8**0.5)
919
        self.assertAlmostEqual(euclidean('nelson', 'neilsen', 2), 7**0.5)
920
921
        # supplied q-gram tests
922
        self.assertEqual(euclidean(QGrams(''), QGrams('')), 0)
923
        self.assertEqual(euclidean(QGrams('nelson'), QGrams('')), 7**0.5)
924
        self.assertEqual(euclidean(QGrams(''), QGrams('neilsen')), 8**0.5)
925
        self.assertAlmostEqual(euclidean(QGrams('nelson'),
926
                                             QGrams('neilsen')), 7**0.5)
927
928
        # non-q-gram tests
929
        self.assertEqual(euclidean('', '', 0), 0)
930
        self.assertEqual(euclidean('the quick', '', 0), 2**0.5)
931
        self.assertEqual(euclidean('', 'the quick', 0), 2**0.5)
932
        self.assertAlmostEqual(euclidean(NONQ_FROM, NONQ_TO, 0), 8**0.5)
933
        self.assertAlmostEqual(euclidean(NONQ_TO, NONQ_FROM, 0), 8**0.5)
934
935
    def test_sim_euclidean(self):
936
        """Test abydos.distance.sim_euclidean."""
937
        self.assertEqual(sim_euclidean('', ''), 1)
938
        self.assertEqual(sim_euclidean('nelson', ''), 0)
939
        self.assertEqual(sim_euclidean('', 'neilsen'), 0)
940
        self.assertAlmostEqual(sim_euclidean('nelson', 'neilsen'),
941
                               1-7**0.5/23**0.5)
942
943
        self.assertEqual(sim_euclidean('', '', 2), 1)
944
        self.assertEqual(sim_euclidean('nelson', '', 2), 0)
945
        self.assertEqual(sim_euclidean('', 'neilsen', 2), 0)
946
        self.assertAlmostEqual(sim_euclidean('nelson', 'neilsen', 2),
947
                               1-7**0.5/23**0.5)
948
949
        # supplied q-gram tests
950
        self.assertEqual(sim_euclidean(QGrams(''), QGrams('')), 1)
951
        self.assertEqual(sim_euclidean(QGrams('nelson'), QGrams('')), 0)
952
        self.assertEqual(sim_euclidean(QGrams(''), QGrams('neilsen')), 0)
953
        self.assertAlmostEqual(sim_euclidean(QGrams('nelson'),
954
                                             QGrams('neilsen')),
955
                               1-7**0.5/23**0.5)
956
957
        # non-q-gram tests
958
        self.assertEqual(sim_euclidean('', '', 0), 1)
959
        self.assertEqual(sim_euclidean('the quick', '', 0), 0)
960
        self.assertEqual(sim_euclidean('', 'the quick', 0), 0)
961
        self.assertAlmostEqual(sim_euclidean(NONQ_FROM, NONQ_TO, 0),
962
                               1-8**0.5/24**0.5)
963
        self.assertAlmostEqual(sim_euclidean(NONQ_TO, NONQ_FROM, 0),
964
                               1-8**0.5/24**0.5)
965
966
    def test_dist_euclidean(self):
967
        """Test abydos.distance.dist_euclidean."""
968
        self.assertEqual(dist_euclidean('', ''), 0)
969
        self.assertEqual(dist_euclidean('nelson', ''), 1)
970
        self.assertEqual(dist_euclidean('', 'neilsen'), 1)
971
        self.assertAlmostEqual(dist_euclidean('nelson', 'neilsen'),
972
                               7**0.5 / 23**0.5)
973
974
        self.assertEqual(dist_euclidean('', '', 2), 0)
975
        self.assertEqual(dist_euclidean('nelson', '', 2), 1)
976
        self.assertEqual(dist_euclidean('', 'neilsen', 2), 1)
977
        self.assertAlmostEqual(dist_euclidean('nelson', 'neilsen', 2),
978
                               7**0.5 / 23**0.5)
979
980
        # supplied q-gram tests
981
        self.assertEqual(dist_euclidean(QGrams(''), QGrams('')), 0)
982
        self.assertEqual(dist_euclidean(QGrams('nelson'), QGrams('')), 1)
983
        self.assertEqual(dist_euclidean(QGrams(''), QGrams('neilsen')), 1)
984
        self.assertAlmostEqual(dist_euclidean(QGrams('nelson'),
985
                                              QGrams('neilsen')),
986
                               7**0.5 / 23**0.5)
987
988
        # non-q-gram tests
989
        self.assertEqual(dist_euclidean('', '', 0), 0)
990
        self.assertEqual(dist_euclidean('the quick', '', 0), 1)
991
        self.assertEqual(dist_euclidean('', 'the quick', 0), 1)
992
        self.assertAlmostEqual(dist_euclidean(NONQ_FROM, NONQ_TO, 0),
993
                               8**0.5/24**0.5)
994
        self.assertAlmostEqual(dist_euclidean(NONQ_TO, NONQ_FROM, 0),
995
                               8**0.5/24**0.5)
996
997
998
class ChebyshevTestCases(unittest.TestCase):
999
    """Test Chebyshev functions.
1000
1001
    abydos.distance.chebyshev, sim_chebyshev & .dist_chebyshev
1002
    """
1003
1004
    def test_chebyshev(self):
1005
        """Test abydos.distance.chebyshev."""
1006
        self.assertEqual(chebyshev('', ''), 0)
1007
        self.assertEqual(chebyshev('nelson', ''), 1)
1008
        self.assertEqual(chebyshev('', 'neilsen'), 1)
1009
        self.assertEqual(chebyshev('nelson', 'neilsen'), 1)
1010
1011
        self.assertEqual(chebyshev('', '', 2), 0)
1012
        self.assertEqual(chebyshev('nelson', '', 2), 1)
1013
        self.assertEqual(chebyshev('', 'neilsen', 2), 1)
1014
        self.assertAlmostEqual(chebyshev('nelson', 'neilsen', 2), 1)
1015
1016
        # supplied q-gram tests
1017
        self.assertEqual(chebyshev(QGrams(''), QGrams('')), 0)
1018
        self.assertEqual(chebyshev(QGrams('nelson'), QGrams('')), 1)
1019
        self.assertEqual(chebyshev(QGrams(''), QGrams('neilsen')), 1)
1020
        self.assertAlmostEqual(chebyshev(QGrams('nelson'),
1021
                                             QGrams('neilsen')), 1)
1022
1023
        # non-q-gram tests
1024
        self.assertEqual(chebyshev('', '', 0), 0)
1025
        self.assertEqual(chebyshev('the quick', '', 0), 1)
1026
        self.assertEqual(chebyshev('', 'the quick', 0), 1)
1027
        self.assertAlmostEqual(chebyshev(NONQ_FROM, NONQ_TO, 0), 1)
1028
        self.assertAlmostEqual(chebyshev(NONQ_TO, NONQ_FROM, 0), 1)
1029
1030
    def test_sim_chebyshev(self):
1031
        """Test abydos.distance.sim_chebyshev."""
1032
        self.assertEqual(sim_chebyshev('', ''), 1)
1033
        self.assertEqual(sim_chebyshev('nelson', ''), 0)
1034
        self.assertEqual(sim_chebyshev('', 'neilsen'), 0)
1035
        self.assertEqual(sim_chebyshev('nelson', 'neilsen'), 0)
1036
1037
        self.assertEqual(sim_chebyshev('', '', 2), 1)
1038
        self.assertEqual(sim_chebyshev('nelson', '', 2), 0)
1039
        self.assertEqual(sim_chebyshev('', 'neilsen', 2), 0)
1040
        self.assertAlmostEqual(sim_chebyshev('nelson', 'neilsen', 2), 0)
1041
1042
        # supplied q-gram tests
1043
        self.assertEqual(sim_chebyshev(QGrams(''), QGrams('')), 1)
1044
        self.assertEqual(sim_chebyshev(QGrams('nelson'), QGrams('')), 0)
1045
        self.assertEqual(sim_chebyshev(QGrams(''), QGrams('neilsen')), 0)
1046
        self.assertAlmostEqual(sim_chebyshev(QGrams('nelson'),
1047
                                             QGrams('neilsen')), 0)
1048
1049
        # non-q-gram tests
1050
        self.assertEqual(sim_chebyshev('', '', 0), 1)
1051
        self.assertEqual(sim_chebyshev('the quick', '', 0), 0)
1052
        self.assertEqual(sim_chebyshev('', 'the quick', 0), 0)
1053
        self.assertAlmostEqual(sim_chebyshev(NONQ_FROM, NONQ_TO, 0), 0)
1054
        self.assertAlmostEqual(sim_chebyshev(NONQ_TO, NONQ_FROM, 0), 0)
1055
1056
    def test_dist_chebyshev(self):
1057
        """Test abydos.distance.dist_chebyshev."""
1058
        self.assertEqual(dist_chebyshev('', ''), 0)
1059
        self.assertEqual(dist_chebyshev('nelson', ''), 1)
1060
        self.assertEqual(dist_chebyshev('', 'neilsen'), 1)
1061
        self.assertEqual(dist_chebyshev('nelson', 'neilsen'), 1)
1062
1063
        self.assertEqual(dist_chebyshev('', '', 2), 0)
1064
        self.assertEqual(dist_chebyshev('nelson', '', 2), 1)
1065
        self.assertEqual(dist_chebyshev('', 'neilsen', 2), 1)
1066
        self.assertAlmostEqual(dist_chebyshev('nelson', 'neilsen', 2), 1)
1067
1068
        # supplied q-gram tests
1069
        self.assertEqual(dist_chebyshev(QGrams(''), QGrams('')), 0)
1070
        self.assertEqual(dist_chebyshev(QGrams('nelson'), QGrams('')), 1)
1071
        self.assertEqual(dist_chebyshev(QGrams(''), QGrams('neilsen')), 1)
1072
        self.assertAlmostEqual(dist_chebyshev(QGrams('nelson'),
1073
                                              QGrams('neilsen')), 1)
1074
1075
        # non-q-gram tests
1076
        self.assertEqual(dist_chebyshev('', '', 0), 0)
1077
        self.assertEqual(dist_chebyshev('the quick', '', 0), 1)
1078
        self.assertEqual(dist_chebyshev('', 'the quick', 0), 1)
1079
        self.assertAlmostEqual(dist_chebyshev(NONQ_FROM, NONQ_TO, 0), 1)
1080
        self.assertAlmostEqual(dist_chebyshev(NONQ_TO, NONQ_FROM, 0), 1)
1081
1082
1083
class CosineSimilarityTestCases(unittest.TestCase):
1084
    """Test cosine similarity functions.
1085
1086
    abydos.distance.sim_cosine & .dist_cosine
1087
    """
1088
1089
    def test_sim_cosine(self):
1090
        """Test abydos.distance.sim_cosine."""
1091
        self.assertEqual(sim_cosine('', ''), 1)
1092
        self.assertEqual(sim_cosine('nelson', ''), 0)
1093
        self.assertEqual(sim_cosine('', 'neilsen'), 0)
1094
        self.assertAlmostEqual(sim_cosine('nelson', 'neilsen'),
1095
                               4/math.sqrt(7*8))
1096
1097
        self.assertEqual(sim_cosine('', '', 2), 1)
1098
        self.assertEqual(sim_cosine('nelson', '', 2), 0)
1099
        self.assertEqual(sim_cosine('', 'neilsen', 2), 0)
1100
        self.assertAlmostEqual(sim_cosine('nelson', 'neilsen', 2),
1101
                               4/math.sqrt(7*8))
1102
1103
        # supplied q-gram tests
1104
        self.assertEqual(sim_cosine(QGrams(''), QGrams('')), 1)
1105
        self.assertEqual(sim_cosine(QGrams('nelson'), QGrams('')), 0)
1106
        self.assertEqual(sim_cosine(QGrams(''), QGrams('neilsen')), 0)
1107
        self.assertAlmostEqual(sim_cosine(QGrams('nelson'), QGrams('neilsen')),
1108
                               4/math.sqrt(7*8))
1109
1110
        # non-q-gram tests
1111
        self.assertEqual(sim_cosine('', '', 0), 1)
1112
        self.assertEqual(sim_cosine('the quick', '', 0), 0)
1113
        self.assertEqual(sim_cosine('', 'the quick', 0), 0)
1114
        self.assertAlmostEqual(sim_cosine(NONQ_FROM, NONQ_TO, 0),
1115
                               4/math.sqrt(9*7))
1116
        self.assertAlmostEqual(sim_cosine(NONQ_TO, NONQ_FROM, 0),
1117
                               4/math.sqrt(9*7))
1118
1119
    def test_dist_cosine(self):
1120
        """Test abydos.distance.dist_cosine."""
1121
        self.assertEqual(dist_cosine('', ''), 0)
1122
        self.assertEqual(dist_cosine('nelson', ''), 1)
1123
        self.assertEqual(dist_cosine('', 'neilsen'), 1)
1124
        self.assertAlmostEqual(dist_cosine('nelson', 'neilsen'),
1125
                               1-(4/math.sqrt(7*8)))
1126
1127
        self.assertEqual(dist_cosine('', '', 2), 0)
1128
        self.assertEqual(dist_cosine('nelson', '', 2), 1)
1129
        self.assertEqual(dist_cosine('', 'neilsen', 2), 1)
1130
        self.assertAlmostEqual(dist_cosine('nelson', 'neilsen', 2),
1131
                               1-(4/math.sqrt(7*8)))
1132
1133
        # supplied q-gram tests
1134
        self.assertEqual(dist_cosine(QGrams(''), QGrams('')), 0)
1135
        self.assertEqual(dist_cosine(QGrams('nelson'), QGrams('')), 1)
1136
        self.assertEqual(dist_cosine(QGrams(''), QGrams('neilsen')), 1)
1137
        self.assertAlmostEqual(dist_cosine(QGrams('nelson'),
1138
                                           QGrams('neilsen')),
1139
                               1-(4/math.sqrt(7*8)))
1140
1141
        # non-q-gram tests
1142
        self.assertEqual(dist_cosine('', '', 0), 0)
1143
        self.assertEqual(dist_cosine('the quick', '', 0), 1)
1144
        self.assertEqual(dist_cosine('', 'the quick', 0), 1)
1145
        self.assertAlmostEqual(dist_cosine(NONQ_FROM, NONQ_TO, 0),
1146
                               1-4/math.sqrt(9*7))
1147
        self.assertAlmostEqual(dist_cosine(NONQ_TO, NONQ_FROM, 0),
1148
                               1-4/math.sqrt(9*7))
1149
1150
1151
class JaroWinklerTestCases(unittest.TestCase):
1152
    """Test Jaro(-Winkler) functions.
1153
1154
    abydos.distance.sim_strcmp95, .dist_strcmp95, .sim_jaro_winkler, &
1155
    .dist_jaro_winkler
1156
    """
1157
1158
    def test_sim_strcmp95(self):
1159
        """Test abydos.distance.sim_strcmp95."""
1160
        self.assertEqual(sim_strcmp95('', ''), 1)
1161
        self.assertEqual(sim_strcmp95('MARTHA', ''), 0)
1162
        self.assertEqual(sim_strcmp95('', 'MARTHA'), 0)
1163
        self.assertEqual(sim_strcmp95('MARTHA', 'MARTHA'), 1)
1164
1165
        self.assertAlmostEqual(sim_strcmp95('MARTHA', 'MARHTA'), 0.96111111)
1166
        self.assertAlmostEqual(sim_strcmp95('DWAYNE', 'DUANE'), 0.873)
1167
        self.assertAlmostEqual(sim_strcmp95('DIXON', 'DICKSONX'), 0.839333333)
1168
1169
        self.assertAlmostEqual(sim_strcmp95('ABCD', 'EFGH'), 0.0)
1170
1171
        # long_strings = True
1172
        self.assertAlmostEqual(sim_strcmp95('DIXON', 'DICKSONX', True),
1173
                               0.85393939)
1174
        self.assertAlmostEqual(sim_strcmp95('DWAYNE', 'DUANE', True),
1175
                               0.89609090)
1176
        self.assertAlmostEqual(sim_strcmp95('MARTHA', 'MARHTA', True),
1177
                               0.97083333)
1178
1179
    def test_dist_strcmp95(self):
1180
        """Test abydos.distance.dist_strcmp95."""
1181
        self.assertEqual(dist_strcmp95('', ''), 0)
1182
        self.assertEqual(dist_strcmp95('MARTHA', ''), 1)
1183
        self.assertEqual(dist_strcmp95('', 'MARTHA'), 1)
1184
        self.assertEqual(dist_strcmp95('MARTHA', 'MARTHA'), 0)
1185
1186
        self.assertAlmostEqual(dist_strcmp95('MARTHA', 'MARHTA'), 0.03888888)
1187
        self.assertAlmostEqual(dist_strcmp95('DWAYNE', 'DUANE'), 0.127)
1188
        self.assertAlmostEqual(dist_strcmp95('DIXON', 'DICKSONX'), 0.160666666)
1189
1190
        self.assertAlmostEqual(dist_strcmp95('ABCD', 'EFGH'), 1.0)
1191
1192
    def test_sim_jaro_winkler(self):
1193
        """Test abydos.distance.sim_jaro_winkler."""
1194
        self.assertEqual(sim_jaro_winkler('', '', mode='jaro'), 1)
1195
        self.assertEqual(sim_jaro_winkler('', '', mode='winkler'), 1)
1196
        self.assertEqual(sim_jaro_winkler('MARTHA', '', mode='jaro'), 0)
1197
        self.assertEqual(sim_jaro_winkler('MARTHA', '', mode='winkler'), 0)
1198
        self.assertEqual(sim_jaro_winkler('', 'MARHTA', mode='jaro'), 0)
1199
        self.assertEqual(sim_jaro_winkler('', 'MARHTA', mode='winkler'), 0)
1200
        self.assertEqual(sim_jaro_winkler('MARTHA', 'MARTHA', mode='jaro'), 1)
1201
        self.assertEqual(sim_jaro_winkler('MARTHA', 'MARTHA', mode='winkler'),
1202
                         1)
1203
1204
        # https://en.wikipedia.org/wiki/Jaro-Winkler_distance
1205
        self.assertAlmostEqual(sim_jaro_winkler('MARTHA', 'MARHTA',
1206
                                                mode='jaro'), 0.94444444)
1207
        self.assertAlmostEqual(sim_jaro_winkler('MARTHA', 'MARHTA',
1208
                                                mode='winkler'), 0.96111111)
1209
        self.assertAlmostEqual(sim_jaro_winkler('DWAYNE', 'DUANE',
1210
                                                mode='jaro'), 0.82222222)
1211
        self.assertAlmostEqual(sim_jaro_winkler('DWAYNE', 'DUANE',
1212
                                                mode='winkler'), 0.84)
1213
        self.assertAlmostEqual(sim_jaro_winkler('DIXON', 'DICKSONX',
1214
                                                mode='jaro'), 0.76666666)
1215
        self.assertAlmostEqual(sim_jaro_winkler('DIXON', 'DICKSONX',
1216
                                                mode='winkler'), 0.81333333)
1217
1218
        self.assertRaises(ValueError, sim_jaro_winkler, 'abcd', 'dcba',
1219
                          boost_threshold=2)
1220
        self.assertRaises(ValueError, sim_jaro_winkler, 'abcd', 'dcba',
1221
                          boost_threshold=-1)
1222
        self.assertRaises(ValueError, sim_jaro_winkler, 'abcd', 'dcba',
1223
                          scaling_factor=0.3)
1224
        self.assertRaises(ValueError, sim_jaro_winkler, 'abcd', 'dcba',
1225
                          scaling_factor=-1)
1226
1227
        self.assertAlmostEqual(sim_jaro_winkler('ABCD', 'EFGH'), 0.0)
1228
1229
        # long_strings = True (applies only to Jaro-Winkler, not Jaro)
1230
        self.assertEqual(sim_jaro_winkler('ABCD', 'EFGH', long_strings=True),
1231
                         sim_jaro_winkler('ABCD', 'EFGH'))
1232
        self.assertEqual(sim_jaro_winkler('DIXON', 'DICKSONX', mode='jaro',
1233
                                          long_strings=True),
1234
                         sim_jaro_winkler('DIXON', 'DICKSONX',
1235
                                          mode='jaro'))
1236
        self.assertAlmostEqual(sim_jaro_winkler('DIXON', 'DICKSONX',
1237
                                                mode='winkler',
1238
                                                long_strings=True), 0.83030303)
1239
        self.assertAlmostEqual(sim_jaro_winkler('MARTHA', 'MARHTA',
1240
                                                mode='winkler',
1241
                                                long_strings=True), 0.97083333)
1242
1243
    def test_dist_jaro_winkler(self):
1244
        """Test abydos.distance.dist_jaro_winkler."""
1245
        self.assertEqual(dist_jaro_winkler('', '', mode='jaro'), 0)
1246
        self.assertEqual(dist_jaro_winkler('', '', mode='winkler'), 0)
1247
        self.assertEqual(dist_jaro_winkler('MARTHA', '', mode='jaro'), 1)
1248
        self.assertEqual(dist_jaro_winkler('MARTHA', '', mode='winkler'), 1)
1249
        self.assertEqual(dist_jaro_winkler('', 'MARHTA', mode='jaro'), 1)
1250
        self.assertEqual(dist_jaro_winkler('', 'MARHTA', mode='winkler'), 1)
1251
        self.assertEqual(dist_jaro_winkler('MARTHA', 'MARTHA', mode='jaro'), 0)
1252
        self.assertEqual(dist_jaro_winkler('MARTHA', 'MARTHA', mode='winkler'),
1253
                         0)
1254
1255
        # https://en.wikipedia.org/wiki/Jaro-Winkler_distance
1256
        self.assertAlmostEqual(dist_jaro_winkler('MARTHA', 'MARHTA',
1257
                                                 mode='jaro'), 0.05555555)
1258
        self.assertAlmostEqual(dist_jaro_winkler('MARTHA', 'MARHTA',
1259
                                                 mode='winkler'), 0.03888888)
1260
        self.assertAlmostEqual(dist_jaro_winkler('DWAYNE', 'DUANE',
1261
                                                 mode='jaro'), 0.17777777)
1262
        self.assertAlmostEqual(dist_jaro_winkler('DWAYNE', 'DUANE',
1263
                                                 mode='winkler'), 0.16)
1264
        self.assertAlmostEqual(dist_jaro_winkler('DIXON', 'DICKSONX',
1265
                                                 mode='jaro'), 0.23333333)
1266
        self.assertAlmostEqual(dist_jaro_winkler('DIXON', 'DICKSONX',
1267
                                                 mode='winkler'), 0.18666666)
1268
1269
        self.assertRaises(ValueError, dist_jaro_winkler, 'abcd', 'dcba',
1270
                          boost_threshold=2)
1271
        self.assertRaises(ValueError, dist_jaro_winkler, 'abcd', 'dcba',
1272
                          boost_threshold=-1)
1273
        self.assertRaises(ValueError, dist_jaro_winkler, 'abcd', 'dcba',
1274
                          scaling_factor=0.3)
1275
        self.assertRaises(ValueError, dist_jaro_winkler, 'abcd', 'dcba',
1276
                          scaling_factor=-1)
1277
1278
        self.assertAlmostEqual(dist_jaro_winkler('ABCD', 'EFGH'), 1.0)
1279
1280
1281
class LcsseqTestCases(unittest.TestCase):
1282
    """Test LCSseq functions.
1283
1284
    abydos.distance.lcsseq, .sim_lcsseq, & .dist_lcsseq
1285
    """
1286
1287
    def test_lcsseq(self):
1288
        """Test abydos.distance.lcsseq."""
1289
        self.assertEqual(lcsseq('', ''), '')
1290
        self.assertEqual(lcsseq('A', ''), '')
1291
        self.assertEqual(lcsseq('', 'A'), '')
1292
        self.assertEqual(lcsseq('A', 'A'), 'A')
1293
        self.assertEqual(lcsseq('ABCD', ''), '')
1294
        self.assertEqual(lcsseq('', 'ABCD'), '')
1295
        self.assertEqual(lcsseq('ABCD', 'ABCD'), 'ABCD')
1296
        self.assertEqual(lcsseq('ABCD', 'BC'), 'BC')
1297
        self.assertEqual(lcsseq('ABCD', 'AD'), 'AD')
1298
        self.assertEqual(lcsseq('ABCD', 'AC'), 'AC')
1299
        self.assertEqual(lcsseq('AB', 'CD'), '')
1300
        self.assertEqual(lcsseq('ABC', 'BCD'), 'BC')
1301
1302
        self.assertEqual(lcsseq('DIXON', 'DICKSONX'), 'DION')
1303
1304
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1305
        self.assertEqual(lcsseq('AGCAT', 'GAC'), 'AC')
1306
        self.assertEqual(lcsseq('XMJYAUZ', 'MZJAWXU'), 'MJAU')
1307
1308
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1309
        self.assertEqual(lcsseq('hell', 'hello'), 'hell')
1310
        self.assertEqual(lcsseq('hello', 'hell'), 'hell')
1311
        self.assertEqual(lcsseq('ell', 'hell'), 'ell')
1312
        self.assertEqual(lcsseq('hell', 'ell'), 'ell')
1313
        self.assertEqual(lcsseq('faxbcd', 'abdef'), 'abd')
1314
1315
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1316
        self.assertEqual(lcsseq('hello world', 'world war 2'), 'world')
1317
        self.assertEqual(lcsseq('foo bar', 'bar foo'), 'foo')
1318
        self.assertEqual(lcsseq('aaa', 'aa'), 'aa')
1319
        self.assertEqual(lcsseq('cc', 'bbbbcccccc'), 'cc')
1320
        self.assertEqual(lcsseq('ccc', 'bcbb'), 'c')
1321
1322
    def test_sim_lcsseq(self):
1323
        """Test abydos.distance.sim_lcsseq."""
1324
        self.assertEqual(sim_lcsseq('', ''), 1)
1325
        self.assertEqual(sim_lcsseq('A', ''), 0)
1326
        self.assertEqual(sim_lcsseq('', 'A'), 0)
1327
        self.assertEqual(sim_lcsseq('A', 'A'), 1)
1328
        self.assertEqual(sim_lcsseq('ABCD', ''), 0)
1329
        self.assertEqual(sim_lcsseq('', 'ABCD'), 0)
1330
        self.assertEqual(sim_lcsseq('ABCD', 'ABCD'), 1)
1331
        self.assertAlmostEqual(sim_lcsseq('ABCD', 'BC'), 2/4)
1332
        self.assertAlmostEqual(sim_lcsseq('ABCD', 'AD'), 2/4)
1333
        self.assertAlmostEqual(sim_lcsseq('ABCD', 'AC'), 2/4)
1334
        self.assertAlmostEqual(sim_lcsseq('AB', 'CD'), 0)
1335
        self.assertAlmostEqual(sim_lcsseq('ABC', 'BCD'), 2/3)
1336
1337
        self.assertAlmostEqual(sim_lcsseq('DIXON', 'DICKSONX'), 4/8)
1338
1339
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1340
        self.assertAlmostEqual(sim_lcsseq('AGCAT', 'GAC'), 2/5)
1341
        self.assertAlmostEqual(sim_lcsseq('XMJYAUZ', 'MZJAWXU'), 4/7)
1342
1343
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1344
        self.assertAlmostEqual(sim_lcsseq('hell', 'hello'), 4/5)
1345
        self.assertAlmostEqual(sim_lcsseq('hello', 'hell'), 4/5)
1346
        self.assertAlmostEqual(sim_lcsseq('ell', 'hell'), 3/4)
1347
        self.assertAlmostEqual(sim_lcsseq('hell', 'ell'), 3/4)
1348
        self.assertAlmostEqual(sim_lcsseq('faxbcd', 'abdef'), 3/6)
1349
1350
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1351
        self.assertAlmostEqual(sim_lcsseq('hello world', 'world war 2'), 5/11)
1352
        self.assertAlmostEqual(sim_lcsseq('foo bar', 'bar foo'), 3/7)
1353
        self.assertAlmostEqual(sim_lcsseq('aaa', 'aa'), 2/3)
1354
        self.assertAlmostEqual(sim_lcsseq('cc', 'bbbbcccccc'), 2/10)
1355
        self.assertAlmostEqual(sim_lcsseq('ccc', 'bcbb'), 1/4)
1356
1357
    def test_dist_lcsseq(self):
1358
        """Test abydos.distance.dist_lcsseq."""
1359
        self.assertEqual(dist_lcsseq('', ''), 0)
1360
        self.assertEqual(dist_lcsseq('A', ''), 1)
1361
        self.assertEqual(dist_lcsseq('', 'A'), 1)
1362
        self.assertEqual(dist_lcsseq('A', 'A'), 0)
1363
        self.assertEqual(dist_lcsseq('ABCD', ''), 1)
1364
        self.assertEqual(dist_lcsseq('', 'ABCD'), 1)
1365
        self.assertEqual(dist_lcsseq('ABCD', 'ABCD'), 0)
1366
        self.assertAlmostEqual(dist_lcsseq('ABCD', 'BC'), 2/4)
1367
        self.assertAlmostEqual(dist_lcsseq('ABCD', 'AD'), 2/4)
1368
        self.assertAlmostEqual(dist_lcsseq('ABCD', 'AC'), 2/4)
1369
        self.assertAlmostEqual(dist_lcsseq('AB', 'CD'), 1)
1370
        self.assertAlmostEqual(dist_lcsseq('ABC', 'BCD'), 1/3)
1371
1372
        self.assertAlmostEqual(dist_lcsseq('DIXON', 'DICKSONX'), 4/8)
1373
1374
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1375
        self.assertAlmostEqual(dist_lcsseq('AGCAT', 'GAC'), 3/5)
1376
        self.assertAlmostEqual(dist_lcsseq('XMJYAUZ', 'MZJAWXU'), 3/7)
1377
1378
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1379
        self.assertAlmostEqual(dist_lcsseq('hell', 'hello'), 1/5)
1380
        self.assertAlmostEqual(dist_lcsseq('hello', 'hell'), 1/5)
1381
        self.assertAlmostEqual(dist_lcsseq('ell', 'hell'), 1/4)
1382
        self.assertAlmostEqual(dist_lcsseq('hell', 'ell'), 1/4)
1383
        self.assertAlmostEqual(dist_lcsseq('faxbcd', 'abdef'), 3/6)
1384
1385
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1386
        self.assertAlmostEqual(dist_lcsseq('hello world', 'world war 2'), 6/11)
1387
        self.assertAlmostEqual(dist_lcsseq('foo bar', 'bar foo'), 4/7)
1388
        self.assertAlmostEqual(dist_lcsseq('aaa', 'aa'), 1/3)
1389
        self.assertAlmostEqual(dist_lcsseq('cc', 'bbbbcccccc'), 8/10)
1390
        self.assertAlmostEqual(dist_lcsseq('ccc', 'bcbb'), 3/4)
1391
1392
1393
class LcsstrTestCases(unittest.TestCase):
1394
    """Test LCSstr functions.
1395
1396
    abydos.distance.lcsstr, .sim_lcsstr, & .dist_lcsstr
1397
    """
1398
1399
    def test_lcsstr(self):
1400
        """Test abydos.distance.lcsstr."""
1401
        self.assertEqual(lcsstr('', ''), '')
1402
        self.assertEqual(lcsstr('A', ''), '')
1403
        self.assertEqual(lcsstr('', 'A'), '')
1404
        self.assertEqual(lcsstr('A', 'A'), 'A')
1405
        self.assertEqual(lcsstr('ABCD', ''), '')
1406
        self.assertEqual(lcsstr('', 'ABCD'), '')
1407
        self.assertEqual(lcsstr('ABCD', 'ABCD'), 'ABCD')
1408
        self.assertEqual(lcsstr('ABCD', 'BC'), 'BC')
1409
        self.assertEqual(lcsstr('ABCD', 'AD'), 'A')
1410
        self.assertEqual(lcsstr('ABCD', 'AC'), 'A')
1411
        self.assertEqual(lcsstr('AB', 'CD'), '')
1412
        self.assertEqual(lcsstr('ABC', 'BCD'), 'BC')
1413
1414
        self.assertEqual(lcsstr('DIXON', 'DICKSONX'), 'DI')
1415
1416
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1417
        self.assertEqual(lcsstr('AGCAT', 'GAC'), 'A')
1418
        self.assertEqual(lcsstr('XMJYAUZ', 'MZJAWXU'), 'X')
1419
1420
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1421
        self.assertEqual(lcsstr('hell', 'hello'), 'hell')
1422
        self.assertEqual(lcsstr('hello', 'hell'), 'hell')
1423
        self.assertEqual(lcsstr('ell', 'hell'), 'ell')
1424
        self.assertEqual(lcsstr('hell', 'ell'), 'ell')
1425
        self.assertEqual(lcsstr('faxbcd', 'abdef'), 'f')
1426
1427
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1428
        self.assertEqual(lcsstr('hello world', 'world war 2'), 'world')
1429
        self.assertEqual(lcsstr('foo bar', 'bar foo'), 'foo')
1430
        self.assertEqual(lcsstr('aaa', 'aa'), 'aa')
1431
        self.assertEqual(lcsstr('cc', 'bbbbcccccc'), 'cc')
1432
        self.assertEqual(lcsstr('ccc', 'bcbb'), 'c')
1433
1434
        # http://www.maplesoft.com/support/help/Maple/view.aspx?path=StringTools/LongestCommonSubString
1435
        self.assertEqual(lcsstr('abax', 'bax'), 'bax')
1436
        self.assertEqual(lcsstr('tsaxbaxyz', 'axcaxy'), 'axy')
1437
        self.assertEqual(lcsstr('abcde', 'uvabxycde'), 'cde')
1438
        self.assertEqual(lcsstr('abc', 'xyz'), '')
1439
        self.assertEqual(lcsstr('TAAGGTCGGCGCGCACGCTGGCGAGTATGGTGCGGAGGCCCTGGA\
1440
GAGGTGAGGCTCCCTCCCCTGCTCCGACCCGGGCTCCTCGCCCGCCCGGACCCAC', 'AAGCGCCGCGCAGTCTGGG\
1441
CTCCGCACACTTCTGGTCCAGTCCGACTGAGAAGGAACCACCATGGTGCTGTCTCCCGCTGACAAGACCAACATCAAG\
1442
ACTGCCTGGGAAAAGATCGGCAGCCACGGTGGCGAGTATGGCGCCGAGGCCGT'), 'TGGCGAGTATGG')
1443
1444
    def test_sim_lcsstr(self):
1445
        """Test abydos.distance.sim_lcsstr."""
1446
        self.assertEqual(sim_lcsstr('', ''), 1)
1447
        self.assertEqual(sim_lcsstr('A', ''), 0)
1448
        self.assertEqual(sim_lcsstr('', 'A'), 0)
1449
        self.assertEqual(sim_lcsstr('A', 'A'), 1)
1450
        self.assertEqual(sim_lcsstr('ABCD', ''), 0)
1451
        self.assertEqual(sim_lcsstr('', 'ABCD'), 0)
1452
        self.assertEqual(sim_lcsstr('ABCD', 'ABCD'), 1)
1453
        self.assertAlmostEqual(sim_lcsstr('ABCD', 'BC'), 2/4)
1454
        self.assertAlmostEqual(sim_lcsstr('ABCD', 'AD'), 1/4)
1455
        self.assertAlmostEqual(sim_lcsstr('ABCD', 'AC'), 1/4)
1456
        self.assertAlmostEqual(sim_lcsstr('AB', 'CD'), 0)
1457
        self.assertAlmostEqual(sim_lcsstr('ABC', 'BCD'), 2/3)
1458
1459
        self.assertAlmostEqual(sim_lcsstr('DIXON', 'DICKSONX'), 2/8)
1460
1461
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1462
        self.assertAlmostEqual(sim_lcsstr('AGCAT', 'GAC'), 1/5)
1463
        self.assertAlmostEqual(sim_lcsstr('XMJYAUZ', 'MZJAWXU'), 1/7)
1464
1465
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1466
        self.assertAlmostEqual(sim_lcsstr('hell', 'hello'), 4/5)
1467
        self.assertAlmostEqual(sim_lcsstr('hello', 'hell'), 4/5)
1468
        self.assertAlmostEqual(sim_lcsstr('ell', 'hell'), 3/4)
1469
        self.assertAlmostEqual(sim_lcsstr('hell', 'ell'), 3/4)
1470
        self.assertAlmostEqual(sim_lcsstr('faxbcd', 'abdef'), 1/6)
1471
1472
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1473
        self.assertAlmostEqual(sim_lcsstr('hello world', 'world war 2'), 5/11)
1474
        self.assertAlmostEqual(sim_lcsstr('foo bar', 'bar foo'), 3/7)
1475
        self.assertAlmostEqual(sim_lcsstr('aaa', 'aa'), 2/3)
1476
        self.assertAlmostEqual(sim_lcsstr('cc', 'bbbbcccccc'), 2/10)
1477
        self.assertAlmostEqual(sim_lcsstr('ccc', 'bcbb'), 1/4)
1478
1479
    def test_dist_lcsstr(self):
1480
        """Test abydos.distance.dist_lcsstr."""
1481
        self.assertEqual(dist_lcsstr('', ''), 0)
1482
        self.assertEqual(dist_lcsstr('A', ''), 1)
1483
        self.assertEqual(dist_lcsstr('', 'A'), 1)
1484
        self.assertEqual(dist_lcsstr('A', 'A'), 0)
1485
        self.assertEqual(dist_lcsstr('ABCD', ''), 1)
1486
        self.assertEqual(dist_lcsstr('', 'ABCD'), 1)
1487
        self.assertEqual(dist_lcsstr('ABCD', 'ABCD'), 0)
1488
        self.assertAlmostEqual(dist_lcsstr('ABCD', 'BC'), 2/4)
1489
        self.assertAlmostEqual(dist_lcsstr('ABCD', 'AD'), 3/4)
1490
        self.assertAlmostEqual(dist_lcsstr('ABCD', 'AC'), 3/4)
1491
        self.assertAlmostEqual(dist_lcsstr('AB', 'CD'), 1)
1492
        self.assertAlmostEqual(dist_lcsstr('ABC', 'BCD'), 1/3)
1493
1494
        self.assertAlmostEqual(dist_lcsstr('DIXON', 'DICKSONX'), 6/8)
1495
1496
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1497
        self.assertAlmostEqual(dist_lcsstr('AGCAT', 'GAC'), 4/5)
1498
        self.assertAlmostEqual(dist_lcsstr('XMJYAUZ', 'MZJAWXU'), 6/7)
1499
1500
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1501
        self.assertAlmostEqual(dist_lcsstr('hell', 'hello'), 1/5)
1502
        self.assertAlmostEqual(dist_lcsstr('hello', 'hell'), 1/5)
1503
        self.assertAlmostEqual(dist_lcsstr('ell', 'hell'), 1/4)
1504
        self.assertAlmostEqual(dist_lcsstr('hell', 'ell'), 1/4)
1505
        self.assertAlmostEqual(dist_lcsstr('faxbcd', 'abdef'), 5/6)
1506
1507
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1508
        self.assertAlmostEqual(dist_lcsstr('hello world', 'world war 2'), 6/11)
1509
        self.assertAlmostEqual(dist_lcsstr('foo bar', 'bar foo'), 4/7)
1510
        self.assertAlmostEqual(dist_lcsstr('aaa', 'aa'), 1/3)
1511
        self.assertAlmostEqual(dist_lcsstr('cc', 'bbbbcccccc'), 8/10)
1512
        self.assertAlmostEqual(dist_lcsstr('ccc', 'bcbb'), 3/4)
1513
1514
1515
class RatcliffObershelpTestCases(unittest.TestCase):
1516
    """Test Ratcliff-Obserhelp functions.
1517
1518
    abydos.distance.sim_ratcliff_obershelp, &
1519
    abydos.distance.dist_ratcliff_obershelp
1520
    """
1521
1522
    def test_sim_ratcliff_obershelp(self):
1523
        """Test abydos.distance.sim_ratcliff_obershelp."""
1524
        # https://github.com/rockymadden/stringmetric/blob/master/core/src/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala
1525
        self.assertEqual(sim_ratcliff_obershelp('', ''), 1)
1526
        self.assertEqual(sim_ratcliff_obershelp('abc', ''), 0)
1527
        self.assertEqual(sim_ratcliff_obershelp('', 'xyz'), 0)
1528
        self.assertEqual(sim_ratcliff_obershelp('abc', 'abc'), 1)
1529
        self.assertEqual(sim_ratcliff_obershelp('123', '123'), 1)
1530
        self.assertEqual(sim_ratcliff_obershelp('abc', 'xyz'), 0)
1531
        self.assertEqual(sim_ratcliff_obershelp('123', '456'), 0)
1532
        self.assertAlmostEqual(sim_ratcliff_obershelp('aleksander',
1533
                                                      'alexandre'),
1534
                               0.7368421052631579)
1535
        self.assertAlmostEqual(sim_ratcliff_obershelp('alexandre',
1536
                                                      'aleksander'),
1537
                               0.7368421052631579)
1538
        self.assertAlmostEqual(sim_ratcliff_obershelp('pennsylvania',
1539
                                                      'pencilvaneya'),
1540
                               0.6666666666666666)
1541
        self.assertAlmostEqual(sim_ratcliff_obershelp('pencilvaneya',
1542
                                                      'pennsylvania'),
1543
                               0.6666666666666666)
1544
        self.assertAlmostEqual(sim_ratcliff_obershelp('abcefglmn', 'abefglmo'),
1545
                               0.8235294117647058)
1546
        self.assertAlmostEqual(sim_ratcliff_obershelp('abefglmo', 'abcefglmn'),
1547
                               0.8235294117647058)
1548
1549
        with open(TESTDIR+'/corpora/variantNames.csv') as cav_testset:
1550
            next(cav_testset)
1551
            for line in cav_testset:
1552
                line = line.strip().split(',')
1553
                word1, word2 = line[0], line[4]
1554
                self.assertAlmostEqual(sim_ratcliff_obershelp(word1, word2),
1555
                                       SequenceMatcher(None, word1,
1556
                                                       word2).ratio())
1557
1558
        with open(TESTDIR+'/corpora/wikipediaCommonMisspellings.csv') as missp:
1559
            next(missp)
1560
            for line in missp:
1561
                line = line.strip().upper()
1562
                line = ''.join([_ for _ in line.strip() if _ in
1563
                                tuple('ABCDEFGHIJKLMNOPQRSTUVWXYZ,')])
1564
                word1, word2 = line.split(',')
1565
                # print(word1, word2e)
1566
                self.assertAlmostEqual(sim_ratcliff_obershelp(word1, word2),
1567
                                       SequenceMatcher(None, word1,
1568
                                                       word2).ratio())
1569
1570
    def test_dist_ratcliff_obershelp(self):
1571
        """Test abydos.distance.dist_ratcliff_obershelp."""
1572
        # https://github.com/rockymadden/stringmetric/blob/master/core/src/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala
1573
        self.assertEqual(dist_ratcliff_obershelp('', ''), 0)
1574
        self.assertEqual(dist_ratcliff_obershelp('abc', ''), 1)
1575
        self.assertEqual(dist_ratcliff_obershelp('', 'xyz'), 1)
1576
        self.assertEqual(dist_ratcliff_obershelp('abc', 'abc'), 0)
1577
        self.assertEqual(dist_ratcliff_obershelp('123', '123'), 0)
1578
        self.assertEqual(dist_ratcliff_obershelp('abc', 'xyz'), 1)
1579
        self.assertEqual(dist_ratcliff_obershelp('123', '456'), 1)
1580
        self.assertAlmostEqual(dist_ratcliff_obershelp('aleksander',
1581
                                                       'alexandre'),
1582
                               0.2631578947368421)
1583
        self.assertAlmostEqual(dist_ratcliff_obershelp('alexandre',
1584
                                                       'aleksander'),
1585
                               0.2631578947368421)
1586
        self.assertAlmostEqual(dist_ratcliff_obershelp('pennsylvania',
1587
                                                       'pencilvaneya'),
1588
                               0.3333333333333333)
1589
        self.assertAlmostEqual(dist_ratcliff_obershelp('pencilvaneya',
1590
                                                       'pennsylvania'),
1591
                               0.3333333333333333)
1592
        self.assertAlmostEqual(dist_ratcliff_obershelp('abcefglmn',
1593
                                                       'abefglmo'),
1594
                               0.1764705882352941)
1595
        self.assertAlmostEqual(dist_ratcliff_obershelp('abefglmo',
1596
                                                       'abcefglmn'),
1597
                               0.1764705882352941)
1598
1599
1600
class MraTestCases(unittest.TestCase):
1601
    """Test MRA functions.
1602
1603
    abydos.distance.mra_compare, .sim_mra & .dist_mra
1604
    """
1605
1606
    def test_mra_compare(self):
1607
        """Test abydos.distance.mra_compare."""
1608
        self.assertEqual(mra_compare('', ''), 6)
1609
        self.assertEqual(mra_compare('a', 'a'), 6)
1610
        self.assertEqual(mra_compare('abcdefg', 'abcdefg'), 6)
1611
        self.assertEqual(mra_compare('abcdefg', ''), 0)
1612
        self.assertEqual(mra_compare('', 'abcdefg'), 0)
1613
1614
        # https://en.wikipedia.org/wiki/Match_rating_approach
1615
        self.assertEqual(mra_compare('Byrne', 'Boern'), 5)
1616
        self.assertEqual(mra_compare('Smith', 'Smyth'), 5)
1617
        self.assertEqual(mra_compare('Catherine', 'Kathryn'), 4)
1618
1619
        self.assertEqual(mra_compare('ab', 'abcdefgh'), 0)
1620
        self.assertEqual(mra_compare('ab', 'ac'), 5)
1621
        self.assertEqual(mra_compare('abcdefik', 'abcdefgh'), 3)
1622
        self.assertEqual(mra_compare('xyz', 'abc'), 0)
1623
1624
    def test_sim_mra(self):
1625
        """Test abydos.distance.sim_mra."""
1626
        self.assertEqual(sim_mra('', ''), 1)
1627
        self.assertEqual(sim_mra('a', 'a'), 1)
1628
        self.assertEqual(sim_mra('abcdefg', 'abcdefg'), 1)
1629
        self.assertEqual(sim_mra('abcdefg', ''), 0)
1630
        self.assertEqual(sim_mra('', 'abcdefg'), 0)
1631
1632
        # https://en.wikipedia.org/wiki/Match_rating_approach
1633
        self.assertEqual(sim_mra('Byrne', 'Boern'), 5/6)
1634
        self.assertEqual(sim_mra('Smith', 'Smyth'), 5/6)
1635
        self.assertEqual(sim_mra('Catherine', 'Kathryn'), 4/6)
1636
1637
        self.assertEqual(sim_mra('ab', 'abcdefgh'), 0)
1638
        self.assertEqual(sim_mra('ab', 'ac'), 5/6)
1639
        self.assertEqual(sim_mra('abcdefik', 'abcdefgh'), 3/6)
1640
        self.assertEqual(sim_mra('xyz', 'abc'), 0)
1641
1642
    def test_dist_mra(self):
1643
        """Test abydos.distance.dist_mra."""
1644
        self.assertEqual(dist_mra('', ''), 0)
1645
        self.assertEqual(dist_mra('a', 'a'), 0)
1646
        self.assertEqual(dist_mra('abcdefg', 'abcdefg'), 0)
1647
        self.assertEqual(dist_mra('abcdefg', ''), 1)
1648
        self.assertEqual(dist_mra('', 'abcdefg'), 1)
1649
1650
        # https://en.wikipedia.org/wiki/Match_rating_approach
1651
        self.assertAlmostEqual(dist_mra('Byrne', 'Boern'), 1/6)
1652
        self.assertAlmostEqual(dist_mra('Smith', 'Smyth'), 1/6)
1653
        self.assertAlmostEqual(dist_mra('Catherine', 'Kathryn'), 2/6)
1654
1655
        self.assertEqual(dist_mra('ab', 'abcdefgh'), 1)
1656
        self.assertAlmostEqual(dist_mra('ab', 'ac'), 1/6)
1657
        self.assertAlmostEqual(dist_mra('abcdefik', 'abcdefgh'), 3/6)
1658
        self.assertEqual(dist_mra('xyz', 'abc'), 1)
1659
1660
1661
class CompressionTestCases(unittest.TestCase):
1662
    """Test compression distance functions.
1663
1664
    abydos.distance.dist_compression & .sim_compression
1665
    """
1666
1667
    arith_dict = ac_train(' '.join(NIALL))
1668
1669
    def test_dist_compression(self):
1670
        """Test abydos.distance.dist_compression."""
1671
        self.assertEqual(dist_compression('', ''), 0)
1672
        self.assertEqual(dist_compression('', '', 'bzip2'), 0)
1673
        self.assertEqual(dist_compression('', '', 'zlib'), 0)
1674
        self.assertEqual(dist_compression('', '', 'arith'), 0)
1675
        self.assertEqual(dist_compression('', '', 'arith', self.arith_dict), 0)
1676
        self.assertEqual(dist_compression('', '', 'rle'), 0)
1677
        self.assertEqual(dist_compression('', '', 'bwtrle'), 0)
1678
1679
        self.assertGreater(dist_compression('a', ''), 0)
1680
        self.assertGreater(dist_compression('a', '', 'bzip2'), 0)
1681
        self.assertGreater(dist_compression('a', '', 'zlib'), 0)
1682
        self.assertGreater(dist_compression('a', '', 'arith'), 0)
1683
        self.assertGreater(dist_compression('a', '', 'arith', self.arith_dict),
1684
                           0)
1685
        self.assertGreater(dist_compression('a', '', 'rle'), 0)
1686
        self.assertGreater(dist_compression('a', '', 'bwtrle'), 0)
1687
1688
        self.assertGreater(dist_compression('abcdefg', 'fg'), 0)
1689
        self.assertGreater(dist_compression('abcdefg', 'fg', 'bzip2'), 0)
1690
        self.assertGreater(dist_compression('abcdefg', 'fg', 'zlib'), 0)
1691
        self.assertGreater(dist_compression('abcdefg', 'fg', 'arith'), 0)
1692
        self.assertGreater(dist_compression('abcdefg', 'fg', 'rle'), 0)
1693
        self.assertGreater(dist_compression('abcdefg', 'fg', 'bwtrle'), 0)
1694
1695
    def test_dist_compression_arith(self):
1696
        """Test abydos.distance.dist_compression (arithmetric compression)."""
1697
        self.assertAlmostEqual(dist_compression('Niall', 'Neil', 'arith',
1698
                                                self.arith_dict),
1699
                               0.608695652173913)
1700
        self.assertAlmostEqual(dist_compression('Neil', 'Niall', 'arith',
1701
                                                self.arith_dict),
1702
                               0.608695652173913)
1703
        self.assertAlmostEqual(dist_compression('Niall', 'Neil', 'arith'),
1704
                               0.6875)
1705
        self.assertAlmostEqual(dist_compression('Neil', 'Niall', 'arith'),
1706
                               0.6875)
1707
        self.assertAlmostEqual(dist_compression('Njáll', 'Njall', 'arith',
1708
                                                self.arith_dict),
1709
                               0.714285714285714)
1710
        self.assertAlmostEqual(dist_compression('Njall', 'Njáll', 'arith',
1711
                                                self.arith_dict),
1712
                               0.714285714285714)
1713
        self.assertAlmostEqual(dist_compression('Njáll', 'Njall', 'arith'),
1714
                               0.75)
1715
        self.assertAlmostEqual(dist_compression('Njall', 'Njáll', 'arith'),
1716
                               0.75)
1717
1718
    def test_dist_compression_rle(self):
1719
        """Test abydos.distance.dist_compression (RLE & BWT+RLE)."""
1720
        self.assertAlmostEqual(dist_compression('abc', 'abc', 'rle'), 0)
1721
        self.assertAlmostEqual(dist_compression('abc', 'def', 'rle'), 1)
1722
1723
        self.assertAlmostEqual(dist_compression('abc', 'abc', 'bwtrle'), 0)
1724
        self.assertAlmostEqual(dist_compression('abc', 'def', 'bwtrle'), 0.75)
1725
1726
        self.assertAlmostEqual(dist_compression('aaa', 'bbaaa', 'rle'), 0.5)
1727
        self.assertAlmostEqual(dist_compression('abb', 'bbba', 'rle'), 1/3)
1728
        self.assertAlmostEqual(dist_compression('banana', 'banane', 'bwtrle'),
1729
                               0.57142857142)
1730
        self.assertAlmostEqual(dist_compression('bananas', 'bananen',
1731
                                                'bwtrle'),
1732
                               0.5)
1733
1734
    def test_sim_compression(self):
1735
        """Test abydos.distance.sim_compression."""
1736
        self.assertEqual(sim_compression('', ''), 1)
1737
        self.assertEqual(sim_compression('', '', 'bzip2'), 1)
1738
        self.assertEqual(sim_compression('', '', 'zlib'), 1)
1739
        self.assertEqual(sim_compression('', '', 'arith'), 1)
1740
        self.assertEqual(sim_compression('', '', 'arith', self.arith_dict), 1)
1741
        self.assertEqual(sim_compression('', '', 'rle'), 1)
1742
        self.assertEqual(sim_compression('', '', 'bwtrle'), 1)
1743
1744
        self.assertLess(sim_compression('a', ''), 1)
1745
        self.assertLess(sim_compression('a', '', 'bzip2'), 1)
1746
        self.assertLess(sim_compression('a', '', 'zlib'), 1)
1747
        self.assertLess(sim_compression('a', '', 'arith'), 1)
1748
        self.assertLess(sim_compression('a', '', 'arith', self.arith_dict), 1)
1749
        self.assertLess(sim_compression('a', '', 'rle'), 1)
1750
        self.assertLess(sim_compression('a', '', 'bwtrle'), 1)
1751
1752
        self.assertLess(sim_compression('abcdefg', 'fg'), 1)
1753
        self.assertLess(sim_compression('abcdefg', 'fg', 'bzip2'), 1)
1754
        self.assertLess(sim_compression('abcdefg', 'fg', 'zlib'), 1)
1755
        self.assertLess(sim_compression('abcdefg', 'fg', 'arith'), 1)
1756
        self.assertLess(sim_compression('abcdefg', 'fg', 'rle'), 1)
1757
        self.assertLess(sim_compression('abcdefg', 'fg', 'bwtrle'), 1)
1758
1759
    def test_sim_compression_arith(self):
1760
        """Test abydos.distance.sim_compression (arithmetric compression)."""
1761
        self.assertAlmostEqual(sim_compression('Niall', 'Neil', 'arith',
1762
                                               self.arith_dict),
1763
                               0.3913043478260869)
1764
        self.assertAlmostEqual(sim_compression('Neil', 'Niall', 'arith',
1765
                                               self.arith_dict),
1766
                               0.3913043478260869)
1767
        self.assertAlmostEqual(sim_compression('Niall', 'Neil', 'arith'),
1768
                               0.3125)
1769
        self.assertAlmostEqual(sim_compression('Neil', 'Niall', 'arith'),
1770
                               0.3125)
1771
        self.assertAlmostEqual(sim_compression('Njáll', 'Njall', 'arith',
1772
                                               self.arith_dict),
1773
                               0.285714285714285)
1774
        self.assertAlmostEqual(sim_compression('Njall', 'Njáll', 'arith',
1775
                                               self.arith_dict),
1776
                               0.285714285714285)
1777
        self.assertAlmostEqual(sim_compression('Njáll', 'Njall', 'arith'),
1778
                               0.25)
1779
        self.assertAlmostEqual(sim_compression('Njall', 'Njáll', 'arith'),
1780
                               0.25)
1781
1782
    def test_sim_compression_rle(self):
1783
        """Test abydos.distance.sim_compression (RLE & BWT+RLE)."""
1784
        self.assertAlmostEqual(sim_compression('abc', 'abc', 'rle'), 1)
1785
        self.assertAlmostEqual(sim_compression('abc', 'def', 'rle'), 0)
1786
1787
        self.assertAlmostEqual(sim_compression('abc', 'abc', 'bwtrle'), 1)
1788
        self.assertAlmostEqual(sim_compression('abc', 'def', 'bwtrle'), 0.25)
1789
1790
        self.assertAlmostEqual(sim_compression('aaa', 'bbaaa', 'rle'), 0.5)
1791
        self.assertAlmostEqual(sim_compression('abb', 'bbba', 'rle'), 2/3)
1792
        self.assertAlmostEqual(sim_compression('banana', 'banane', 'bwtrle'),
1793
                               0.42857142857)
1794
        self.assertAlmostEqual(sim_compression('bananas', 'bananen', 'bwtrle'),
1795
                               0.5)
1796
1797
    def test_lzma(self):
1798
        """Test LZMA-related sim/dist functions."""
1799
        if bool(pkgutil.find_loader('lzma')):
1800
            self.assertEqual(dist_compression('', '', 'lzma'), 0)
1801
            self.assertGreater(dist_compression('a', '', 'lzma'), 0)
1802
            self.assertGreater(dist_compression('abcdefg', 'fg', 'lzma'), 0)
1803
            self.assertEqual(sim_compression('', '', 'lzma'), 1)
1804
            self.assertLess(sim_compression('a', '', 'lzma'), 1)
1805
            self.assertLess(sim_compression('abcdefg', 'fg', 'lzma'), 1)
1806
            del sys.modules['lzma']
1807
1808
        self.assertRaises(ValueError, dist_compression, 'a', '', 'lzma')
1809
1810
1811
class MongeElkanTestCases(unittest.TestCase):
1812
    """Test Monge-Elkan functions.
1813
1814
    abydos.distance.sim_monge_elkan & .dist_monge_elkan
1815
    """
1816
1817
    def test_sim_monge_elkan(self):
1818
        """Test abydos.distance.sim_monge_elkan."""
1819
        self.assertEqual(sim_monge_elkan('', ''), 1)
1820
        self.assertEqual(sim_monge_elkan('', 'a'), 0)
1821
        self.assertEqual(sim_monge_elkan('a', 'a'), 1)
1822
1823
        self.assertEqual(sim_monge_elkan('Niall', 'Neal'), 3/4)
1824
        self.assertEqual(sim_monge_elkan('Niall', 'Njall'), 5/6)
1825
        self.assertEqual(sim_monge_elkan('Niall', 'Niel'), 3/4)
1826
        self.assertEqual(sim_monge_elkan('Niall', 'Nigel'), 3/4)
1827
1828
        self.assertEqual(sim_monge_elkan('Niall', 'Neal', symmetric=True),
1829
                         31/40)
1830
        self.assertEqual(sim_monge_elkan('Niall', 'Njall', symmetric=True),
1831
                         5/6)
1832
        self.assertEqual(sim_monge_elkan('Niall', 'Niel', symmetric=True),
1833
                         31/40)
1834
        self.assertAlmostEqual(sim_monge_elkan('Niall', 'Nigel',
1835
                                               symmetric=True), 17/24)
1836
1837
    def test_dist_monge_elkan(self):
1838
        """Test abydos.distance.dist_monge_elkan."""
1839
        self.assertEqual(dist_monge_elkan('', ''), 0)
1840
        self.assertEqual(dist_monge_elkan('', 'a'), 1)
1841
1842
        self.assertEqual(dist_monge_elkan('Niall', 'Neal'), 1/4)
1843
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Njall'), 1/6)
1844
        self.assertEqual(dist_monge_elkan('Niall', 'Niel'), 1/4)
1845
        self.assertEqual(dist_monge_elkan('Niall', 'Nigel'), 1/4)
1846
1847
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Neal',
1848
                                                symmetric=True), 9/40)
1849
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Njall',
1850
                                                symmetric=True), 1/6)
1851
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Niel',
1852
                                                symmetric=True), 9/40)
1853
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Nigel',
1854
                                                symmetric=True), 7/24)
1855
1856
1857
class IdentityTestCases(unittest.TestCase):
1858
    """Test identity similarity functions.
1859
1860
    abydos.distance.sim_ident & .dist_ident
1861
    """
1862
1863
    def test_sim_ident(self):
1864
        """Test abydos.distance.sim_ident."""
1865
        self.assertEqual(sim_ident('', ''), 1)
1866
        self.assertEqual(sim_ident('', 'a'), 0)
1867
        self.assertEqual(sim_ident('a', ''), 0)
1868
        self.assertEqual(sim_ident('a', 'a'), 1)
1869
        self.assertEqual(sim_ident('abcd', 'abcd'), 1)
1870
        self.assertEqual(sim_ident('abcd', 'dcba'), 0)
1871
        self.assertEqual(sim_ident('abc', 'cba'), 0)
1872
1873
    def test_dist_ident(self):
1874
        """Test abydos.distance.dist_ident."""
1875
        self.assertEqual(dist_ident('', ''), 0)
1876
        self.assertEqual(dist_ident('', 'a'), 1)
1877
        self.assertEqual(dist_ident('a', ''), 1)
1878
        self.assertEqual(dist_ident('a', 'a'), 0)
1879
        self.assertEqual(dist_ident('abcd', 'abcd'), 0)
1880
        self.assertEqual(dist_ident('abcd', 'dcba'), 1)
1881
        self.assertEqual(dist_ident('abc', 'cba'), 1)
1882
1883
1884
def _sim_wikipedia(src, tar):
1885
    """Return a similarity score for two DNA base pairs.
1886
1887
    Values copied from:
1888
    https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm
1889
    """
1890
    nw_matrix = {('A', 'A'): 10, ('G', 'G'): 7, ('C', 'C'): 9, ('T', 'T'): 8,
1891
                 ('A', 'G'): -1, ('A', 'C'): -3, ('A', 'T'): -4,
1892
                 ('G', 'C'): -5, ('G', 'T'): -3, ('C', 'T'): 0}
1893
    return sim_matrix(src, tar, nw_matrix, symmetric=True, alphabet='CGAT')
1894
1895
1896
def _sim_nw(src, tar):
1897
    """Return 1 if src is tar, otherwise -1."""
1898
    return 2*float(src is tar)-1
1899
1900
1901
class MatrixSimTestCases(unittest.TestCase):
1902
    """Test matrix similarity functions.
1903
1904
    abydos.distance.sim_matrix
1905
    """
1906
1907
    def test_sim_matrix(self):
1908
        """Test abydos.distance.sim_matrix."""
1909
        self.assertEqual(sim_matrix('', ''), 1)
1910
        self.assertEqual(sim_matrix('', 'a'), 0)
1911
        self.assertEqual(sim_matrix('a', ''), 0)
1912
        self.assertEqual(sim_matrix('a', 'a'), 1)
1913
        self.assertEqual(sim_matrix('abcd', 'abcd'), 1)
1914
        self.assertEqual(sim_matrix('abcd', 'dcba'), 0)
1915
        self.assertEqual(sim_matrix('abc', 'cba'), 0)
1916
1917
        # https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
1918
        self.assertEqual(_sim_wikipedia('A', 'C'), -3)
1919
        self.assertEqual(_sim_wikipedia('G', 'G'), 7)
1920
        self.assertEqual(_sim_wikipedia('A', 'A'), 10)
1921
        self.assertEqual(_sim_wikipedia('T', 'A'), -4)
1922
        self.assertEqual(_sim_wikipedia('T', 'C'), 0)
1923
        self.assertEqual(_sim_wikipedia('A', 'G'), -1)
1924
        self.assertEqual(_sim_wikipedia('C', 'T'), 0)
1925
1926
        self.assertRaises(ValueError, sim_matrix, 'abc', 'cba', alphabet='ab')
1927
        self.assertRaises(ValueError, sim_matrix, 'abc', 'ba', alphabet='ab')
1928
        self.assertRaises(ValueError, sim_matrix, 'ab', 'cba', alphabet='ab')
1929
1930
1931
class NeedlemanWunschTestCases(unittest.TestCase):
1932
    """Test Needleman-Wunsch functions.
1933
1934
    abydos.distance.needleman_wunsch
1935
    """
1936
1937
    def test_needleman_wunsch(self):
1938
        """Test abydos.distance.needleman_wunsch."""
1939
        self.assertEqual(needleman_wunsch('', ''), 0)
1940
1941
        # https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
1942
        self.assertEqual(needleman_wunsch('GATTACA', 'GCATGCU',
1943
                                          1, _sim_nw), 0)
1944
        self.assertEqual(needleman_wunsch('AGACTAGTTAC', 'CGAGACGT',
1945
                                          5, _sim_wikipedia), 16)
1946
1947
        # checked against http://ds9a.nl/nwunsch/ (mismatch=1, gap=5, skew=5)
1948
        self.assertEqual(needleman_wunsch('CGATATCAG', 'TGACGSTGC',
1949
                                          5, _sim_nw), -5)
1950
        self.assertEqual(needleman_wunsch('AGACTAGTTAC', 'TGACGSTGC',
1951
                                          5, _sim_nw), -7)
1952
        self.assertEqual(needleman_wunsch('AGACTAGTTAC', 'CGAGACGT',
1953
                                          5, _sim_nw), -15)
1954
1955
    def test_needleman_wunsch_nialls(self):
1956
        """Test abydos.distance.needleman_wunsch (Nialls set)."""
1957
        # checked against http://ds9a.nl/nwunsch/ (mismatch=1, gap=2, skew=2)
1958
        nw_vals = (5, 0, -2, 3, 1, 1, -2, -2, -1, -3, -3, -5, -3, -7, -7, -19)
1959
        for i in range(len(NIALL)):
1960
            self.assertEqual(needleman_wunsch(NIALL[0], NIALL[i], 2,
1961
                                              _sim_nw), nw_vals[i])
1962
1963
1964
class SmithWatermanTestCases(unittest.TestCase):
1965
    """Test Smith-Waterman functions.
1966
1967
    abydos.distance.smith_waterman
1968
    """
1969
1970
    def test_smith_waterman(self):
1971
        """Test abydos.distance.smith_waterman."""
1972
        self.assertEqual(smith_waterman('', ''), 0)
1973
1974
        # https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
1975
        self.assertEqual(smith_waterman('GATTACA', 'GCATGCU',
1976
                                        1, _sim_nw), 0)
1977
        self.assertEqual(smith_waterman('AGACTAGTTAC', 'CGAGACGT',
1978
                                        5, _sim_wikipedia), 26)
1979
1980
        self.assertEqual(smith_waterman('CGATATCAG', 'TGACGSTGC',
1981
                                        5, _sim_nw), 0)
1982
        self.assertEqual(smith_waterman('AGACTAGTTAC', 'TGACGSTGC',
1983
                                        5, _sim_nw), 1)
1984
        self.assertEqual(smith_waterman('AGACTAGTTAC', 'CGAGACGT',
1985
                                        5, _sim_nw), 0)
1986
1987
    def test_smith_waterman_nialls(self):
1988
        """Test abydos.distance.smith_waterman (Nialls set)."""
1989
        sw_vals = (5, 1, 1, 3, 2, 1, 1, 0, 0, 1, 1, 2, 2, 1, 0, 0)
1990
        for i in range(len(NIALL)):
1991
            self.assertEqual(smith_waterman(NIALL[0], NIALL[i], 2,
1992
                                            _sim_nw), sw_vals[i])
1993
1994
1995
class GotohTestCases(unittest.TestCase):
1996
    """Test Gotoh functions.
1997
1998
    abydos.distance.gotoh
1999
    """
2000
2001
    def test_gotoh(self):
2002
        """Test abydos.distance.needleman_wunsch_affine."""
2003
        self.assertEqual(gotoh('', ''), 0)
2004
2005
        # https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
2006
        self.assertEqual(gotoh('GATTACA', 'GCATGCU', 1, 1, _sim_nw), 0)
2007
        self.assertGreaterEqual(gotoh('GATTACA', 'GCATGCU', 1, 0.5, _sim_nw),
2008
                                needleman_wunsch('GATTACA', 'GCATGCU', 1,
2009
                                                 _sim_nw))
2010
        self.assertEqual(gotoh('AGACTAGTTAC', 'CGAGACGT', 5, 5,
2011
                               _sim_wikipedia), 16)
2012
        self.assertGreaterEqual(gotoh('AGACTAGTTAC', 'CGAGACGT', 5, 2,
2013
                                      _sim_wikipedia),
2014
                                needleman_wunsch('AGACTAGTTAC', 'CGAGACGT', 5,
2015
                                                 _sim_wikipedia))
2016
2017
        # checked against http://ds9a.nl/nwunsch/ (mismatch=1, gap=5, skew=5)
2018
        self.assertEqual(gotoh('CGATATCAG', 'TGACGSTGC', 5, 5, _sim_nw), -5)
2019
        self.assertGreaterEqual(gotoh('CGATATCAG', 'TGACGSTGC', 5, 2, _sim_nw),
2020
                                needleman_wunsch('CGATATCAG', 'TGACGSTGC', 5,
2021
                                                 _sim_nw))
2022
        self.assertEqual(gotoh('AGACTAGTTAC', 'TGACGSTGC', 5, 5, _sim_nw), -7)
2023
        self.assertGreaterEqual(gotoh('AGACTAGTTAC', 'TGACGSTGC', 5, 2,
2024
                                      _sim_nw),
2025
                                needleman_wunsch('AGACTAGTTAC', 'TGACGSTGC', 5,
2026
                                                 _sim_nw))
2027
        self.assertEqual(gotoh('AGACTAGTTAC', 'CGAGACGT', 5, 5, _sim_nw), -15)
2028
        self.assertGreaterEqual(gotoh('AGACTAGTTAC', 'CGAGACGT', 5, 2,
2029
                                      _sim_nw),
2030
                                needleman_wunsch('AGACTAGTTAC', 'CGAGACGT', 5,
2031
                                                 _sim_nw))
2032
2033
    def test_gotoh_nialls(self):
2034
        """Test abydos.distance.gotoh (Nialls set)."""
2035
        # checked against http://ds9a.nl/nwunsch/ (mismatch=1, gap=2, skew=2)
2036
        nw_vals = (5, 0, -2, 3, 1, 1, -2, -2, -1, -3, -3, -5, -3, -7, -7, -19)
2037
        for i in range(len(NIALL)):
2038
            self.assertEqual(gotoh(NIALL[0], NIALL[i], 2, 2, _sim_nw),
2039
                             nw_vals[i])
2040
        nw_vals2 = (5, 0, -2, 3, 1, 1, -2, -2, -1, -2, -3, -3, -2, -6, -6, -8)
2041
        for i in range(len(NIALL)):
2042
            self.assertEqual(gotoh(NIALL[0], NIALL[i], 2, 1, _sim_nw),
2043
                             nw_vals2[i])
2044
            self.assertGreaterEqual(gotoh(NIALL[0], NIALL[i], 2, 0.5, _sim_nw),
2045
                                    needleman_wunsch(NIALL[0], NIALL[i], 2,
2046
                                                     _sim_nw))
2047
2048
2049
class LengthTestCases(unittest.TestCase):
2050
    """Test length similarity functions.
2051
2052
    abydos.distance.sim_length & .dist_length
2053
    """
2054
2055
    def test_sim_ident(self):
2056
        """Test abydos.distance.sim_length."""
2057
        self.assertEqual(sim_length('', ''), 1)
2058
        self.assertEqual(sim_length('', 'a'), 0)
2059
        self.assertEqual(sim_length('a', ''), 0)
2060
        self.assertEqual(sim_length('a', 'a'), 1)
2061
        self.assertEqual(sim_length('abcd', 'abcd'), 1)
2062
        self.assertEqual(sim_length('abcd', 'dcba'), 1)
2063
        self.assertEqual(sim_length('abc', 'cba'), 1)
2064
        self.assertEqual(sim_length('abc', 'dcba'), 0.75)
2065
        self.assertEqual(sim_length('abcd', 'cba'), 0.75)
2066
        self.assertEqual(sim_length('ab', 'dcba'), 0.5)
2067
        self.assertEqual(sim_length('abcd', 'ba'), 0.5)
2068
2069
    def test_dist_ident(self):
2070
        """Test abydos.distance.dist_length."""
2071
        self.assertEqual(dist_length('', ''), 0)
2072
        self.assertEqual(dist_length('', 'a'), 1)
2073
        self.assertEqual(dist_length('a', ''), 1)
2074
        self.assertEqual(dist_length('a', 'a'), 0)
2075
        self.assertEqual(dist_length('abcd', 'abcd'), 0)
2076
        self.assertEqual(dist_length('abcd', 'dcba'), 0)
2077
        self.assertEqual(dist_length('abc', 'cba'), 0)
2078
        self.assertEqual(dist_length('abc', 'dcba'), 0.25)
2079
        self.assertEqual(dist_length('abcd', 'cba'), 0.25)
2080
        self.assertEqual(dist_length('ab', 'dcba'), 0.5)
2081
        self.assertEqual(dist_length('abcd', 'ba'), 0.5)
2082
2083
2084
class PrefixTestCases(unittest.TestCase):
2085
    """Test prefix similarity functions.
2086
2087
    abydos.distance.sim_prefix & .dist_prefix
2088
    """
2089
2090
    def test_sim_prefix(self):
2091
        """Test abydos.distance.sim_prefix."""
2092
        self.assertEqual(sim_prefix('', ''), 1)
2093
        self.assertEqual(sim_prefix('a', ''), 0)
2094
        self.assertEqual(sim_prefix('', 'a'), 0)
2095
        self.assertEqual(sim_prefix('a', 'a'), 1)
2096
        self.assertEqual(sim_prefix('ax', 'a'), 1)
2097
        self.assertEqual(sim_prefix('axx', 'a'), 1)
2098
        self.assertEqual(sim_prefix('ax', 'ay'), 1/2)
2099
        self.assertEqual(sim_prefix('a', 'ay'), 1)
2100
        self.assertEqual(sim_prefix('a', 'ayy'), 1)
2101
        self.assertEqual(sim_prefix('ax', 'ay'), 1/2)
2102
        self.assertEqual(sim_prefix('a', 'y'), 0)
2103
        self.assertEqual(sim_prefix('y', 'a'), 0)
2104
        self.assertEqual(sim_prefix('aaax', 'aaa'), 1)
2105
        self.assertAlmostEqual(sim_prefix('axxx', 'aaa'), 1/3)
2106
        self.assertEqual(sim_prefix('aaxx', 'aayy'), 1/2)
2107
        self.assertEqual(sim_prefix('xxaa', 'yyaa'), 0)
2108
        self.assertAlmostEqual(sim_prefix('aaxxx', 'aay'), 2/3)
2109
        self.assertEqual(sim_prefix('aaxxxx', 'aayyy'), 2/5)
2110
        self.assertEqual(sim_prefix('xa', 'a'), 0)
2111
        self.assertEqual(sim_prefix('xxa', 'a'), 0)
2112
        self.assertEqual(sim_prefix('xa', 'ya'), 0)
2113
        self.assertEqual(sim_prefix('a', 'ya'), 0)
2114
        self.assertEqual(sim_prefix('a', 'yya'), 0)
2115
        self.assertEqual(sim_prefix('xa', 'ya'), 0)
2116
        self.assertEqual(sim_prefix('xaaa', 'aaa'), 0)
2117
        self.assertEqual(sim_prefix('xxxa', 'aaa'), 0)
2118
        self.assertEqual(sim_prefix('xxxaa', 'yaa'), 0)
2119
        self.assertEqual(sim_prefix('xxxxaa', 'yyyaa'), 0)
2120
2121
    def test_dist_prefix(self):
2122
        """Test abydos.distance.dist_prefix."""
2123
        self.assertEqual(dist_prefix('', ''), 0)
2124
        self.assertEqual(dist_prefix('a', ''), 1)
2125
        self.assertEqual(dist_prefix('', 'a'), 1)
2126
        self.assertEqual(dist_prefix('a', 'a'), 0)
2127
        self.assertEqual(dist_prefix('ax', 'a'), 0)
2128
        self.assertEqual(dist_prefix('axx', 'a'), 0)
2129
        self.assertEqual(dist_prefix('ax', 'ay'), 1/2)
2130
        self.assertEqual(dist_prefix('a', 'ay'), 0)
2131
        self.assertEqual(dist_prefix('a', 'ayy'), 0)
2132
        self.assertEqual(dist_prefix('ax', 'ay'), 1/2)
2133
        self.assertEqual(dist_prefix('a', 'y'), 1)
2134
        self.assertEqual(dist_prefix('y', 'a'), 1)
2135
        self.assertEqual(dist_prefix('aaax', 'aaa'), 0)
2136
        self.assertAlmostEqual(dist_prefix('axxx', 'aaa'), 2/3)
2137
        self.assertEqual(dist_prefix('aaxx', 'aayy'), 1/2)
2138
        self.assertEqual(dist_prefix('xxaa', 'yyaa'), 1)
2139
        self.assertAlmostEqual(dist_prefix('aaxxx', 'aay'), 1/3)
2140
        self.assertEqual(dist_prefix('aaxxxx', 'aayyy'), 3/5)
2141
        self.assertEqual(dist_prefix('xa', 'a'), 1)
2142
        self.assertEqual(dist_prefix('xxa', 'a'), 1)
2143
        self.assertEqual(dist_prefix('xa', 'ya'), 1)
2144
        self.assertEqual(dist_prefix('a', 'ya'), 1)
2145
        self.assertEqual(dist_prefix('a', 'yya'), 1)
2146
        self.assertEqual(dist_prefix('xa', 'ya'), 1)
2147
        self.assertEqual(dist_prefix('xaaa', 'aaa'), 1)
2148
        self.assertEqual(dist_prefix('xxxa', 'aaa'), 1)
2149
        self.assertEqual(dist_prefix('xxxaa', 'yaa'), 1)
2150
        self.assertEqual(dist_prefix('xxxxaa', 'yyyaa'), 1)
2151
2152
2153
class SuffixTestCases(unittest.TestCase):
2154
    """Test suffix similarity functions.
2155
2156
    abydos.distance.sim_suffix & .dist_suffix
2157
    """
2158
2159
    def test_sim_suffix(self):
2160
        """Test abydos.distance.sim_suffix."""
2161
        self.assertEqual(sim_suffix('', ''), 1)
2162
        self.assertEqual(sim_suffix('a', ''), 0)
2163
        self.assertEqual(sim_suffix('', 'a'), 0)
2164
        self.assertEqual(sim_suffix('a', 'a'), 1)
2165
        self.assertEqual(sim_suffix('ax', 'a'), 0)
2166
        self.assertEqual(sim_suffix('axx', 'a'), 0)
2167
        self.assertEqual(sim_suffix('ax', 'ay'), 0)
2168
        self.assertEqual(sim_suffix('a', 'ay'), 0)
2169
        self.assertEqual(sim_suffix('a', 'ayy'), 0)
2170
        self.assertEqual(sim_suffix('ax', 'ay'), 0)
2171
        self.assertEqual(sim_suffix('a', 'y'), 0)
2172
        self.assertEqual(sim_suffix('y', 'a'), 0)
2173
        self.assertEqual(sim_suffix('aaax', 'aaa'), 0)
2174
        self.assertEqual(sim_suffix('axxx', 'aaa'), 0)
2175
        self.assertEqual(sim_suffix('aaxx', 'aayy'), 0)
2176
        self.assertEqual(sim_suffix('xxaa', 'yyaa'), 1/2)
2177
        self.assertEqual(sim_suffix('aaxxx', 'aay'), 0)
2178
        self.assertEqual(sim_suffix('aaxxxx', 'aayyy'), 0)
2179
        self.assertEqual(sim_suffix('xa', 'a'), 1)
2180
        self.assertEqual(sim_suffix('xxa', 'a'), 1)
2181
        self.assertEqual(sim_suffix('xa', 'ya'), 1/2)
2182
        self.assertEqual(sim_suffix('a', 'ya'), 1)
2183
        self.assertEqual(sim_suffix('a', 'yya'), 1)
2184
        self.assertEqual(sim_suffix('xa', 'ya'), 1/2)
2185
        self.assertEqual(sim_suffix('xaaa', 'aaa'), 1)
2186
        self.assertAlmostEqual(sim_suffix('xxxa', 'aaa'), 1/3)
2187
        self.assertAlmostEqual(sim_suffix('xxxaa', 'yaa'), 2/3)
2188
        self.assertEqual(sim_suffix('xxxxaa', 'yyyaa'), 2/5)
2189
2190
    def test_dist_suffix(self):
2191
        """Test abydos.distance.dist_suffix."""
2192
        self.assertEqual(dist_suffix('', ''), 0)
2193
        self.assertEqual(dist_suffix('a', ''), 1)
2194
        self.assertEqual(dist_suffix('', 'a'), 1)
2195
        self.assertEqual(dist_suffix('a', 'a'), 0)
2196
        self.assertEqual(dist_suffix('ax', 'a'), 1)
2197
        self.assertEqual(dist_suffix('axx', 'a'), 1)
2198
        self.assertEqual(dist_suffix('ax', 'ay'), 1)
2199
        self.assertEqual(dist_suffix('a', 'ay'), 1)
2200
        self.assertEqual(dist_suffix('a', 'ayy'), 1)
2201
        self.assertEqual(dist_suffix('ax', 'ay'), 1)
2202
        self.assertEqual(dist_suffix('a', 'y'), 1)
2203
        self.assertEqual(dist_suffix('y', 'a'), 1)
2204
        self.assertEqual(dist_suffix('aaax', 'aaa'), 1)
2205
        self.assertEqual(dist_suffix('axxx', 'aaa'), 1)
2206
        self.assertEqual(dist_suffix('aaxx', 'aayy'), 1)
2207
        self.assertEqual(dist_suffix('xxaa', 'yyaa'), 1/2)
2208
        self.assertEqual(dist_suffix('aaxxx', 'aay'), 1)
2209
        self.assertEqual(dist_suffix('aaxxxx', 'aayyy'), 1)
2210
        self.assertEqual(dist_suffix('xa', 'a'), 0)
2211
        self.assertEqual(dist_suffix('xxa', 'a'), 0)
2212
        self.assertEqual(dist_suffix('xa', 'ya'), 1/2)
2213
        self.assertEqual(dist_suffix('a', 'ya'), 0)
2214
        self.assertEqual(dist_suffix('a', 'yya'), 0)
2215
        self.assertEqual(dist_suffix('xa', 'ya'), 1/2)
2216
        self.assertEqual(dist_suffix('xaaa', 'aaa'), 0)
2217
        self.assertAlmostEqual(dist_suffix('xxxa', 'aaa'), 2/3)
2218
        self.assertAlmostEqual(dist_suffix('xxxaa', 'yaa'), 1/3)
2219
        self.assertEqual(dist_suffix('xxxxaa', 'yyyaa'), 3/5)
2220
2221
2222
class MLIPNSTestCases(unittest.TestCase):
2223
    """Test MLIPNS functions.
2224
2225
    abydos.distance.sim_mlipns & .dist_mlipns
2226
    """
2227
2228
    def test_sim_mlipns(self):
2229
        """Test abydos.distance.sim_mlipns."""
2230
        self.assertEqual(sim_mlipns('', ''), 1)
2231
        self.assertEqual(sim_mlipns('a', ''), 0)
2232
        self.assertEqual(sim_mlipns('', 'a'), 0)
2233
        self.assertEqual(sim_mlipns('a', 'a'), 1)
2234
        self.assertEqual(sim_mlipns('ab', 'a'), 1)
2235
        self.assertEqual(sim_mlipns('abc', 'abc'), 1)
2236
        self.assertEqual(sim_mlipns('abc', 'abcde'), 1)
2237
        self.assertEqual(sim_mlipns('abcg', 'abcdeg'), 1)
2238
        self.assertEqual(sim_mlipns('abcg', 'abcdefg'), 0)
2239
        self.assertEqual(sim_mlipns('Tomato', 'Tamato'), 1)
2240
        self.assertEqual(sim_mlipns('ato', 'Tam'), 1)
2241
2242
    def test_dist_mlipns(self):
2243
        """Test abydos.distance.dist_mlipns."""
2244
        self.assertEqual(dist_mlipns('', ''), 0)
2245
        self.assertEqual(dist_mlipns('a', ''), 1)
2246
        self.assertEqual(dist_mlipns('', 'a'), 1)
2247
        self.assertEqual(dist_mlipns('a', 'a'), 0)
2248
        self.assertEqual(dist_mlipns('ab', 'a'), 0)
2249
        self.assertEqual(dist_mlipns('abc', 'abc'), 0)
2250
        self.assertEqual(dist_mlipns('abc', 'abcde'), 0)
2251
        self.assertEqual(dist_mlipns('abcg', 'abcdeg'), 0)
2252
        self.assertEqual(dist_mlipns('abcg', 'abcdefg'), 1)
2253
        self.assertEqual(dist_mlipns('Tomato', 'Tamato'), 0)
2254
        self.assertEqual(dist_mlipns('ato', 'Tam'), 0)
2255
2256
2257
class BagTestCases(unittest.TestCase):
2258
    """Test bag similarity functions.
2259
2260
    abydos.distance.bag, .sim_bag & .dist_bag
2261
    """
2262
2263
    def test_bag(self):
2264
        """Test abydos.distance.bag."""
2265
        self.assertEqual(bag('', ''), 0)
2266
        self.assertEqual(bag('nelson', ''), 6)
2267
        self.assertEqual(bag('', 'neilsen'), 7)
2268
        self.assertEqual(bag('ab', 'a'), 1)
2269
        self.assertEqual(bag('ab', 'c'), 2)
2270
        self.assertEqual(bag('nelson', 'neilsen'), 2)
2271
        self.assertEqual(bag('neilsen', 'nelson'), 2)
2272
        self.assertEqual(bag('niall', 'neal'), 2)
2273
        self.assertEqual(bag('aluminum', 'Catalan'), 5)
2274
        self.assertEqual(bag('abcdefg', 'hijklm'), 7)
2275
        self.assertEqual(bag('abcdefg', 'hijklmno'), 8)
2276
2277
    def test_sim_bag(self):
2278
        """Test abydos.distance.sim_bag."""
2279
        self.assertEqual(sim_bag('', ''), 1)
2280
        self.assertEqual(sim_bag('nelson', ''), 0)
2281
        self.assertEqual(sim_bag('', 'neilsen'), 0)
2282
        self.assertEqual(sim_bag('ab', 'a'), 0.5)
2283
        self.assertEqual(sim_bag('ab', 'c'), 0)
2284
        self.assertAlmostEqual(sim_bag('nelson', 'neilsen'), 5/7)
2285
        self.assertAlmostEqual(sim_bag('neilsen', 'nelson'), 5/7)
2286
        self.assertAlmostEqual(sim_bag('niall', 'neal'), 3/5)
2287
        self.assertAlmostEqual(sim_bag('aluminum', 'Catalan'), 3/8)
2288
        self.assertEqual(sim_bag('abcdefg', 'hijklm'), 0)
2289
        self.assertEqual(sim_bag('abcdefg', 'hijklmno'), 0)
2290
2291
    def test_dist_bag(self):
2292
        """Test abydos.distance.dist_bag."""
2293
        self.assertEqual(dist_bag('', ''), 0)
2294
        self.assertEqual(dist_bag('nelson', ''), 1)
2295
        self.assertEqual(dist_bag('', 'neilsen'), 1)
2296
        self.assertEqual(dist_bag('ab', 'a'), 0.5)
2297
        self.assertEqual(dist_bag('ab', 'c'), 1)
2298
        self.assertAlmostEqual(dist_bag('nelson', 'neilsen'), 2/7)
2299
        self.assertAlmostEqual(dist_bag('neilsen', 'nelson'), 2/7)
2300
        self.assertAlmostEqual(dist_bag('niall', 'neal'), 2/5)
2301
        self.assertAlmostEqual(dist_bag('aluminum', 'Catalan'), 5/8)
2302
        self.assertEqual(dist_bag('abcdefg', 'hijklm'), 1)
2303
        self.assertEqual(dist_bag('abcdefg', 'hijklmno'), 1)
2304
2305
2306
class EditexTestCases(unittest.TestCase):
2307
    """Test Editex functions.
2308
2309
    abydos.distance.editex, .sim_editex & .dist_editex
2310
    """
2311
2312
    def test_editex(self):
2313
        """Test abydos.distance.editex."""
2314
        self.assertEqual(editex('', ''), 0)
2315
        self.assertEqual(editex('nelson', ''), 12)
2316
        self.assertEqual(editex('', 'neilsen'), 14)
2317
        self.assertEqual(editex('ab', 'a'), 2)
2318
        self.assertEqual(editex('ab', 'c'), 4)
2319
        self.assertEqual(editex('nelson', 'neilsen'), 2)
2320
        self.assertEqual(editex('neilsen', 'nelson'), 2)
2321
        self.assertEqual(editex('niall', 'neal'), 1)
2322
        self.assertEqual(editex('neal', 'niall'), 1)
2323
        self.assertEqual(editex('niall', 'nihal'), 2)
2324
        self.assertEqual(editex('nihal', 'niall'), 2)
2325
        self.assertEqual(editex('neal', 'nihl'), 3)
2326
        self.assertEqual(editex('nihl', 'neal'), 3)
2327
2328
    def test_editex_local(self):
2329
        """Test abydos.distance.editex (local variant)."""
2330
        self.assertEqual(editex('', '', local=True), 0)
2331
        self.assertEqual(editex('nelson', '', local=True), 12)
2332
        self.assertEqual(editex('', 'neilsen', local=True), 14)
2333
        self.assertEqual(editex('ab', 'a', local=True), 2)
2334
        self.assertEqual(editex('ab', 'c', local=True), 2)
2335
        self.assertEqual(editex('nelson', 'neilsen', local=True), 2)
2336
        self.assertEqual(editex('neilsen', 'nelson', local=True), 2)
2337
        self.assertEqual(editex('niall', 'neal', local=True), 1)
2338
        self.assertEqual(editex('neal', 'niall', local=True), 1)
2339
        self.assertEqual(editex('niall', 'nihal', local=True), 2)
2340
        self.assertEqual(editex('nihal', 'niall', local=True), 2)
2341
        self.assertEqual(editex('neal', 'nihl', local=True), 3)
2342
        self.assertEqual(editex('nihl', 'neal', local=True), 3)
2343
2344
    def test_sim_editex(self):
2345
        """Test abydos.distance.sim_editex."""
2346
        self.assertEqual(sim_editex('', ''), 1)
2347
        self.assertEqual(sim_editex('nelson', ''), 0)
2348
        self.assertEqual(sim_editex('', 'neilsen'), 0)
2349
        self.assertEqual(sim_editex('ab', 'a'), 0.5)
2350
        self.assertEqual(sim_editex('ab', 'c'), 0)
2351
        self.assertAlmostEqual(sim_editex('nelson', 'neilsen'), 12/14)
2352
        self.assertAlmostEqual(sim_editex('neilsen', 'nelson'), 12/14)
2353
        self.assertEqual(sim_editex('niall', 'neal'), 0.9)
2354
2355
    def test_dist_editex(self):
2356
        """Test abydos.distance.dist_editex."""
2357
        self.assertEqual(dist_editex('', ''), 0)
2358
        self.assertEqual(dist_editex('nelson', ''), 1)
2359
        self.assertEqual(dist_editex('', 'neilsen'), 1)
2360
        self.assertEqual(dist_editex('ab', 'a'), 0.5)
2361
        self.assertEqual(dist_editex('ab', 'c'), 1)
2362
        self.assertAlmostEqual(dist_editex('nelson', 'neilsen'), 2/14)
2363
        self.assertAlmostEqual(dist_editex('neilsen', 'nelson'), 2/14)
2364
        self.assertEqual(dist_editex('niall', 'neal'), 0.1)
2365
2366
class EudexTestCases(unittest.TestCase):
2367
    """Test Eudex distance functions.
2368
2369
    abydos.distance.eudex_hamming, dist_eudex, & sim_eudex
2370
    """
2371
2372
    def test_eudex_hamming(self):
2373
        """Test abydos.distance.eudex_hamming."""
2374
        # Base cases
2375
        self.assertEqual(eudex_hamming('', ''), 0)
2376
        self.assertEqual(eudex_hamming('', '', None), 0)
2377
        self.assertEqual(eudex_hamming('', '', 'fibonacci'), 0)
2378
        self.assertEqual(eudex_hamming('', '', [10, 1, 1, 1]), 0)
2379
        self.assertEqual(eudex_hamming('', '',
2380
                                       lambda: [(yield 1) for _
2381
                                                in range(10)]), 0)
2382
        self.assertEqual(eudex_hamming('', '', normalized=True), 0)
2383
2384
        self.assertEqual(eudex_hamming('Niall', 'Niall'), 0)
2385
        self.assertEqual(eudex_hamming('Niall', 'Niall', None), 0)
2386
        self.assertEqual(eudex_hamming('Niall', 'Niall', 'fibonacci'), 0)
2387
        self.assertEqual(eudex_hamming('Niall', 'Niall', [10, 1, 1, 1]), 0)
2388
        self.assertEqual(eudex_hamming('Niall', 'Niall',
2389
                                       lambda: [(yield 1) for _
2390
                                                in range(10)]), 0)
2391
        self.assertEqual(eudex_hamming('Niall', 'Niall', normalized=True), 0)
2392
2393
        self.assertEqual(eudex_hamming('Niall', 'Neil'), 2)
2394
        self.assertEqual(eudex_hamming('Niall', 'Neil', None), 1)
2395
        self.assertEqual(eudex_hamming('Niall', 'Neil', 'fibonacci'), 2)
2396
        self.assertEqual(eudex_hamming('Niall', 'Neil', [10, 1, 1, 1]), 1)
2397
        self.assertEqual(eudex_hamming('Niall', 'Neil',
2398
                                       lambda: [(yield 1) for _
2399
                                                in range(10)]), 1)
2400
        self.assertAlmostEqual(eudex_hamming('Niall', 'Neil', normalized=True),
2401
                               0.00098039)
2402
2403
        self.assertEqual(eudex_hamming('Niall', 'Colin'), 524)
2404
        self.assertEqual(eudex_hamming('Niall', 'Colin', None), 10)
2405
        self.assertEqual(eudex_hamming('Niall', 'Colin', 'fibonacci'), 146)
2406
        self.assertEqual(eudex_hamming('Niall', 'Colin', [10, 1, 1, 1]), 6)
2407
        self.assertEqual(eudex_hamming('Niall', 'Colin',
2408
                                       lambda: [(yield 1) for _
2409
                                                in range(10)]), 10)
2410
        self.assertAlmostEqual(eudex_hamming('Niall', 'Colin',
2411
                                             normalized=True), 0.25686274)
2412
2413
    def test_dist_eudex(self):
2414
        """Test abydos.distance.dist_eudex."""
2415
        # Base cases
2416
        self.assertEqual(dist_eudex('', ''), 0)
2417
        self.assertEqual(dist_eudex('', '', None), 0)
2418
        self.assertEqual(dist_eudex('', '', 'fibonacci'), 0)
2419
2420
        self.assertEqual(dist_eudex('Niall', 'Niall'), 0)
2421
        self.assertEqual(dist_eudex('Niall', 'Niall', None), 0)
2422
        self.assertEqual(dist_eudex('Niall', 'Niall', 'fibonacci'), 0)
2423
2424
        self.assertAlmostEqual(dist_eudex('Niall', 'Neil'), 0.00098039)
2425
        self.assertAlmostEqual(dist_eudex('Niall', 'Neil', None), 0.11111111)
2426
        self.assertAlmostEqual(dist_eudex('Niall', 'Neil', 'fibonacci'),
2427
                               0.00287356)
2428
2429
        self.assertAlmostEqual(dist_eudex('Niall', 'Colin'), 0.25686275)
2430
        self.assertAlmostEqual(dist_eudex('Niall', 'Colin', None), 0.16666667)
2431
        self.assertAlmostEqual(dist_eudex('Niall', 'Colin', 'fibonacci'),
2432
                               0.20977011)
2433
2434
    def test_sim_eudex(self):
2435
        """Test abydos.distance.sim_eudex."""
2436
        # Base cases
2437
        self.assertEqual(sim_eudex('', ''), 1)
2438
        self.assertEqual(sim_eudex('', '', None), 1)
2439
        self.assertEqual(sim_eudex('', '', 'fibonacci'), 1)
2440
2441
        self.assertEqual(sim_eudex('Niall', 'Niall'), 1)
2442
        self.assertEqual(sim_eudex('Niall', 'Niall', None), 1)
2443
        self.assertEqual(sim_eudex('Niall', 'Niall', 'fibonacci'), 1)
2444
2445
        self.assertAlmostEqual(sim_eudex('Niall', 'Neil'), 0.99901961)
2446
        self.assertAlmostEqual(sim_eudex('Niall', 'Neil', None), 0.88888889)
2447
        self.assertAlmostEqual(sim_eudex('Niall', 'Neil', 'fibonacci'),
2448
                               0.99712644)
2449
2450
        self.assertAlmostEqual(sim_eudex('Niall', 'Colin'), 0.74313725)
2451
        self.assertAlmostEqual(sim_eudex('Niall', 'Colin', None), 0.83333333)
2452
        self.assertAlmostEqual(sim_eudex('Niall', 'Colin', 'fibonacci'),
2453
                               0.79022989)
2454
2455
2456
class Sift4TestCases(unittest.TestCase):
2457
    """Test Sift4 functions.
2458
2459
    abydos.distance.sift4_simplest, sift4_common, sim_sift4, & sim_sift4
2460
    """
2461
2462
    def test_sift4_simplest(self):
2463
        """Test abydos.distance.sift4_simplest."""
2464
        # tests copied from Lukas Benedix's post at
2465
        # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
2466
        self.assertEqual(sift4_simplest('', ''), 0)
2467
        self.assertEqual(sift4_simplest('a', ''), 1)
2468
        self.assertEqual(sift4_simplest('', 'a'), 1)
2469
        self.assertEqual(sift4_simplest('abc', ''), 3)
2470
        self.assertEqual(sift4_simplest('', 'abc'), 3)
2471
2472
        self.assertEqual(sift4_simplest('a', 'a'), 0)
2473
        self.assertEqual(sift4_simplest('abc', 'abc'), 0)
2474
2475
        self.assertEqual(sift4_simplest('a', 'ab'), 1)
2476
        self.assertEqual(sift4_simplest('ac', 'abc'), 1)
2477
        self.assertEqual(sift4_simplest('abcdefg', 'xabxcdxxefxgx'), 10)
2478
2479
        self.assertEqual(sift4_simplest('ab', 'b'), 1)
2480
        self.assertEqual(sift4_simplest('ab', 'a'), 1)
2481
        self.assertEqual(sift4_simplest('abc', 'ac'), 1)
2482
        self.assertEqual(sift4_simplest('xabxcdxxefxgx', 'abcdefg'), 10)
2483
2484
        self.assertEqual(sift4_simplest('a', 'b'), 1)
2485
        self.assertEqual(sift4_simplest('ab', 'ac'), 1)
2486
        self.assertEqual(sift4_simplest('ac', 'bc'), 1)
2487
        self.assertEqual(sift4_simplest('abc', 'axc'), 1)
2488
        self.assertEqual(sift4_simplest('xabxcdxxefxgx', '1ab2cd34ef5g6'), 6)
2489
2490
        self.assertEqual(sift4_simplest('example', 'samples'), 2)
2491
        self.assertEqual(sift4_simplest('sturgeon', 'urgently'), 4)
2492
        self.assertEqual(sift4_simplest('levenshtein', 'frankenstein'), 10)
2493
        self.assertEqual(sift4_simplest('distance', 'difference'), 7)
2494
2495
        # Tests copied from
2496
        # https://github.com/tdebatty/java-string-similarity/blob/master/src/test/java/info/debatty/java/stringsimilarity/experimental/Sift4Test.java
2497
        self.assertEqual(sift4_simplest('This is the first string',
2498
                                        'And this is another string', 5), 13)
2499
        self.assertEqual(sift4_simplest('Lorem ipsum dolor sit amet, ' +
2500
                                        'consectetur adipiscing elit.',
2501
                                        'Amet Lorm ispum dolor sit amet, ' +
2502
                                        'consetetur adixxxpiscing elit.',
2503
                                        10), 20)
2504
2505
    def test_sift4_common(self):
2506
        """Test abydos.distance.sift4_common."""
2507
        # tests copied from Lukas Benedix's post at
2508
        # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
2509
        self.assertEqual(sift4_common('', ''), 0)
2510
        self.assertEqual(sift4_common('a', ''), 1)
2511
        self.assertEqual(sift4_common('', 'a'), 1)
2512
        self.assertEqual(sift4_common('abc', ''), 3)
2513
        self.assertEqual(sift4_common('', 'abc'), 3)
2514
2515
        self.assertEqual(sift4_common('a', 'a'), 0)
2516
        self.assertEqual(sift4_common('abc', 'abc'), 0)
2517
2518
        self.assertEqual(sift4_common('a', 'ab'), 1)
2519
        self.assertEqual(sift4_common('ac', 'abc'), 1)
2520
        self.assertEqual(sift4_common('abcdefg', 'xabxcdxxefxgx'), 7)
2521
2522
        self.assertEqual(sift4_common('ab', 'b'), 1)
2523
        self.assertEqual(sift4_common('ab', 'a'), 1)
2524
        self.assertEqual(sift4_common('abc', 'ac'), 1)
2525
        self.assertEqual(sift4_common('xabxcdxxefxgx', 'abcdefg'), 7)
2526
2527
        self.assertEqual(sift4_common('a', 'b'), 1)
2528
        self.assertEqual(sift4_common('ab', 'ac'), 1)
2529
        self.assertEqual(sift4_common('ac', 'bc'), 1)
2530
        self.assertEqual(sift4_common('abc', 'axc'), 1)
2531
        self.assertEqual(sift4_common('xabxcdxxefxgx', '1ab2cd34ef5g6'), 6)
2532
2533
        self.assertEqual(sift4_common('example', 'samples'), 2)
2534
        self.assertEqual(sift4_common('sturgeon', 'urgently'), 3)
2535
        self.assertEqual(sift4_common('levenshtein', 'frankenstein'), 6)
2536
        self.assertEqual(sift4_common('distance', 'difference'), 5)
2537
2538
        # Tests copied from
2539
        # https://github.com/tdebatty/java-string-similarity/blob/master/src/test/java/info/debatty/java/stringsimilarity/experimental/Sift4Test.java
2540
        self.assertEqual(sift4_common('This is the first string',
2541
                                      'And this is another string', 5), 11)
2542
        self.assertEqual(sift4_common('Lorem ipsum dolor sit amet, ' +
2543
                                      'consectetur adipiscing elit.',
2544
                                      'Amet Lorm ispum dolor sit amet, ' +
2545
                                      'consetetur adixxxpiscing elit.',
2546
                                      10), 12)
2547
2548
        # cases with max_distance
2549
        self.assertEqual(sift4_common('example', 'samples', 5, 5), 5)
2550
        self.assertEqual(sift4_common('sturgeon', 'urgently', 5, 5), 5)
2551
        self.assertEqual(sift4_common('levenshtein', 'frankenstein', 5, 5), 5)
2552
        self.assertEqual(sift4_common('distance', 'difference', 5, 5), 5)
2553
2554
    def test_dist_sift4(self):
2555
        """Test abydos.distance.dist_sift4."""
2556
        # tests copied from Lukas Benedix's post at
2557
        # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
2558
        self.assertEqual(dist_sift4('', ''), 0)
2559
        self.assertEqual(dist_sift4('a', ''), 1)
2560
        self.assertEqual(dist_sift4('', 'a'), 1)
2561
        self.assertEqual(dist_sift4('abc', ''), 1)
2562
        self.assertEqual(dist_sift4('', 'abc'), 1)
2563
2564
        self.assertEqual(dist_sift4('a', 'a'), 0)
2565
        self.assertEqual(dist_sift4('abc', 'abc'), 0)
2566
2567
        self.assertEqual(dist_sift4('a', 'ab'), 0.5)
2568
        self.assertEqual(dist_sift4('ac', 'abc'), 1/3)
2569
        self.assertAlmostEqual(dist_sift4('abcdefg', 'xabxcdxxefxgx'),
2570
                               0.538461538)
2571
2572
        self.assertEqual(dist_sift4('ab', 'b'), 0.5)
2573
        self.assertEqual(dist_sift4('ab', 'a'), 0.5)
2574
        self.assertEqual(dist_sift4('abc', 'ac'), 1/3)
2575
        self.assertAlmostEqual(dist_sift4('xabxcdxxefxgx', 'abcdefg'),
2576
                               0.538461538)
2577
2578
        self.assertEqual(dist_sift4('a', 'b'), 1)
2579
        self.assertEqual(dist_sift4('ab', 'ac'), 0.5)
2580
        self.assertEqual(dist_sift4('ac', 'bc'), 0.5)
2581
        self.assertEqual(dist_sift4('abc', 'axc'), 1/3)
2582
        self.assertAlmostEqual(dist_sift4('xabxcdxxefxgx', '1ab2cd34ef5g6'),
2583
                               0.461538461)
2584
2585
        self.assertAlmostEqual(dist_sift4('example', 'samples'), 0.285714285)
2586
        self.assertAlmostEqual(dist_sift4('sturgeon', 'urgently'), 0.375)
2587
        self.assertAlmostEqual(dist_sift4('levenshtein', 'frankenstein'), 0.5)
2588
        self.assertAlmostEqual(dist_sift4('distance', 'difference'), 0.5)
2589
2590
        # Tests copied from
2591
        # https://github.com/tdebatty/java-string-similarity/blob/master/src/test/java/info/debatty/java/stringsimilarity/experimental/Sift4Test.java
2592
        self.assertAlmostEqual(dist_sift4('This is the first string',
2593
                                          'And this is another string',
2594
                                          5), 0.423076923)
2595
        self.assertAlmostEqual(dist_sift4('Lorem ipsum dolor sit amet, ' +
2596
                                          'consectetur adipiscing elit.',
2597
                                          'Amet Lorm ispum dolor sit amet, ' +
2598
                                          'consetetur adixxxpiscing elit.',
2599
                                          10), 0.193548387)
2600
2601
        # cases with max_distance
2602
        self.assertAlmostEqual(dist_sift4('example', 'samples', 5, 5),
2603
                               0.714285714)
2604
        self.assertAlmostEqual(dist_sift4('sturgeon', 'urgently', 5, 5), 0.625)
2605
        self.assertAlmostEqual(dist_sift4('levenshtein', 'frankenstein', 5, 5),
2606
                               0.416666666)
2607
        self.assertAlmostEqual(dist_sift4('distance', 'difference', 5, 5), 0.5)
2608
2609
    def test_sim_sift4(self):
2610
        """Test abydos.distance.sim_sift4."""
2611
        # tests copied from Lukas Benedix's post at
2612
        # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
2613
        self.assertEqual(sim_sift4('', ''), 1)
2614
        self.assertEqual(sim_sift4('a', ''), 0)
2615
        self.assertEqual(sim_sift4('', 'a'), 0)
2616
        self.assertEqual(sim_sift4('abc', ''), 0)
2617
        self.assertEqual(sim_sift4('', 'abc'), 0)
2618
2619
        self.assertEqual(sim_sift4('a', 'a'), 1)
2620
        self.assertEqual(sim_sift4('abc', 'abc'), 1)
2621
2622
        self.assertEqual(sim_sift4('a', 'ab'), 0.5)
2623
        self.assertAlmostEqual(sim_sift4('ac', 'abc'), 2/3)
2624
        self.assertAlmostEqual(sim_sift4('abcdefg', 'xabxcdxxefxgx'),
2625
                               0.461538461)
2626
2627
        self.assertEqual(sim_sift4('ab', 'b'), 0.5)
2628
        self.assertEqual(sim_sift4('ab', 'a'), 0.5)
2629
        self.assertAlmostEqual(sim_sift4('abc', 'ac'), 2/3)
2630
        self.assertAlmostEqual(sim_sift4('xabxcdxxefxgx', 'abcdefg'),
2631
                               0.461538461)
2632
2633
        self.assertEqual(sim_sift4('a', 'b'), 0)
2634
        self.assertEqual(sim_sift4('ab', 'ac'), 0.5)
2635
        self.assertEqual(sim_sift4('ac', 'bc'), 0.5)
2636
        self.assertAlmostEqual(sim_sift4('abc', 'axc'), 2/3)
2637
        self.assertAlmostEqual(sim_sift4('xabxcdxxefxgx', '1ab2cd34ef5g6'),
2638
                               0.538461538)
2639
2640
        self.assertAlmostEqual(sim_sift4('example', 'samples'), 0.714285714)
2641
        self.assertAlmostEqual(sim_sift4('sturgeon', 'urgently'), 0.625)
2642
        self.assertAlmostEqual(sim_sift4('levenshtein', 'frankenstein'), 0.5)
2643
        self.assertAlmostEqual(sim_sift4('distance', 'difference'), 0.5)
2644
2645
        # Tests copied from
2646
        # https://github.com/tdebatty/java-string-similarity/blob/master/src/test/java/info/debatty/java/stringsimilarity/experimental/Sift4Test.java
2647
        self.assertAlmostEqual(sim_sift4('This is the first string',
2648
                                         'And this is another string',
2649
                                         5), 0.576923077)
2650
        self.assertAlmostEqual(sim_sift4('Lorem ipsum dolor sit amet, ' +
2651
                                         'consectetur adipiscing elit.',
2652
                                         'Amet Lorm ispum dolor sit amet, ' +
2653
                                         'consetetur adixxxpiscing elit.',
2654
                                         10), 0.806451613)
2655
2656
        # cases with max_distance
2657
        self.assertAlmostEqual(sim_sift4('example', 'samples', 5, 5),
2658
                               0.285714286)
2659
        self.assertAlmostEqual(sim_sift4('sturgeon', 'urgently', 5, 5), 0.375)
2660
        self.assertAlmostEqual(sim_sift4('levenshtein', 'frankenstein', 5, 5),
2661
                               0.583333333)
2662
        self.assertAlmostEqual(sim_sift4('distance', 'difference', 5, 5), 0.5)
2663
2664
2665
class BaystatTestCases(unittest.TestCase):
2666
    """Test Baystat functions.
2667
2668
    abydos.distance.sim_baystat & .dist_baystat
2669
    """
2670
2671
    def test_sim_baystat(self):
2672
        """Test abydos.distance.sim_editex."""
2673
        # Base cases
2674
        self.assertEqual(sim_baystat('', ''), 1)
2675
        self.assertEqual(sim_baystat('Colin', ''), 0)
2676
        self.assertEqual(sim_baystat('Colin', 'Colin'), 1)
2677
2678
        # Examples given in the paper
2679
        # https://www.statistik.bayern.de/medien/statistik/zensus/zusammenf__hrung_von_datenbest__nden_ohne_numerische_identifikatoren.pdf
2680
        self.assertAlmostEqual(sim_baystat('DRAKOMENA', 'DRAOMINA'), 7/9)
2681
        self.assertAlmostEqual(sim_baystat('RIEKI', 'RILKI'), 4/5)
2682
        self.assertAlmostEqual(sim_baystat('ATANASSIONI', 'ATANASIOU'), 8/11)
2683
        self.assertAlmostEqual(sim_baystat('LIESKOVSKY', 'LIESZKOVSZKY'),
2684
                               10/12)
2685
        self.assertAlmostEqual(sim_baystat('JEANETTE', 'JEANNETTE'), 8/9)
2686
        self.assertAlmostEqual(sim_baystat('JOHANNES', 'JOHAN'), 0.625)
2687
        self.assertAlmostEqual(sim_baystat('JOHANNES', 'HANS'), 0.375)
2688
        self.assertAlmostEqual(sim_baystat('JOHANNES', 'HANNES'), 0.75)
2689
        self.assertAlmostEqual(sim_baystat('ZIMMERMANN', 'SEMMERMANN'), 0.8)
2690
        self.assertAlmostEqual(sim_baystat('ZIMMERMANN', 'ZIMMERER'), 0.6)
2691
        self.assertAlmostEqual(sim_baystat('ZIMMERMANN', 'ZIMMER'), 0.6)
2692
2693
    def test_dist_baystat(self):
2694
        """Test abydos.distance.dist_editex."""
2695
        # Base cases
2696
        self.assertEqual(dist_baystat('', ''), 0)
2697
        self.assertEqual(dist_baystat('Colin', ''), 1)
2698
        self.assertEqual(dist_baystat('Colin', 'Colin'), 0)
2699
2700
        # Examples given in the paper
2701
        # https://www.statistik.bayern.de/medien/statistik/zensus/zusammenf__hrung_von_datenbest__nden_ohne_numerische_identifikatoren.pdf
2702
        self.assertAlmostEqual(dist_baystat('DRAKOMENA', 'DRAOMINA'), 2/9)
2703
        self.assertAlmostEqual(dist_baystat('RIEKI', 'RILKI'), 1/5)
2704
        self.assertAlmostEqual(dist_baystat('ATANASSIONI', 'ATANASIOU'), 3/11)
2705
        self.assertAlmostEqual(dist_baystat('LIESKOVSKY', 'LIESZKOVSZKY'),
2706
                               2/12)
2707
        self.assertAlmostEqual(dist_baystat('JEANETTE', 'JEANNETTE'), 1/9)
2708
        self.assertAlmostEqual(dist_baystat('JOHANNES', 'JOHAN'), 0.375)
2709
        self.assertAlmostEqual(dist_baystat('JOHANNES', 'HANS'), 0.625)
2710
        self.assertAlmostEqual(dist_baystat('JOHANNES', 'HANNES'), 0.25)
2711
        self.assertAlmostEqual(dist_baystat('ZIMMERMANN', 'SEMMERMANN'), 0.2)
2712
        self.assertAlmostEqual(dist_baystat('ZIMMERMANN', 'ZIMMERER'), 0.4)
2713
        self.assertAlmostEqual(dist_baystat('ZIMMERMANN', 'ZIMMER'), 0.4)
2714
2715
2716
class TypoTestCases(unittest.TestCase):
2717
    """Test Typo functions.
2718
2719
    abydos.distance.typo, sim_typo & .dist_typo
2720
    """
2721
2722
    def test_typo(self):
2723
        """Test abydos.distance.typo."""
2724
        # Base cases
2725
        self.assertEqual(typo('', ''), 0)
2726
2727
    def test_sim_typo(self):
2728
        """Test abydos.distance.sim_typo."""
2729
        # Base cases
2730
        self.assertEqual(sim_typo('', ''), 1)
2731
2732
    def test_dist_typo(self):
2733
        """Test abydos.distance.dist_typo."""
2734
        # Base cases
2735
        self.assertEqual(dist_typo('', ''), 0)
2736
2737
2738
class IndelTestCases(unittest.TestCase):
2739
    """Test indel functions.
2740
2741
    abydos.distance.sim_indel & .dist_indel
2742
    """
2743
2744
    def test_sim_indel(self):
2745
        """Test abydos.distance.sim_indel."""
2746
        # Base cases
2747
        self.assertEqual(sim_indel('', ''), 1)
2748
2749
    def test_dist_indel(self):
2750
        """Test abydos.distance.dist_indel."""
2751
        # Base cases
2752
        self.assertEqual(dist_indel('', ''), 0)
2753
2754
2755
class SynonameTestCases(unittest.TestCase):
2756
    """Test Synoname functions.
2757
2758
    abydos.distance._synoname_strip_punct, synoname_word_approximation, &
2759
    synoname
2760
    """
2761
2762
    def test_synoname_strip_punct(self):
2763
        """Test abydos.distance._synoname_strip_punct."""
2764
        # Base cases
2765
        self.assertEqual(_synoname_strip_punct(''), '')
2766
2767
    def test_synoname_word_approximation(self):
2768
        """Test abydos.distance.synoname_word_approximation."""
2769
        # Base cases
2770
        self.assertEqual(synoname_word_approximation('', ''), 0)
2771
2772
    def test_synoname(self):
2773
        """Test abydos.distance.synoname."""
2774
        # Base cases
2775
        self.assertEqual(synoname('', ''), 1)
2776
2777
2778
class SimDistTestCases(unittest.TestCase):
2779
    """Test generic sim & dist functions.
2780
2781
    abydos.distance.sim & .dist
2782
    """
2783
2784
    def test_sim(self):
2785
        """Test abydos.distance.sim."""
2786
        self.assertEqual(sim('Niall', 'Nigel'),
2787
                         sim_levenshtein('Niall', 'Nigel'))
2788
        self.assertRaises(AttributeError, sim, 'abc', 'abc', 0)
2789
2790
    def test_dist(self):
2791
        """Test abydos.distance.dist."""
2792
        self.assertEqual(dist('Niall', 'Nigel'),
2793
                         dist_levenshtein('Niall', 'Nigel'))
2794
        self.assertRaises(AttributeError, dist, 'abc', 'abc', 0)
2795
2796
2797
if __name__ == '__main__':
2798
    unittest.main()
2799