Completed
Push — master ( 470d5e...802be5 )
by Chris
13:49
created

LcsseqTestCases.test_sim_lcsseq()   A

Complexity

Conditions 1

Size

Total Lines 34
Code Lines 26

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 26
nop 1
dl 0
loc 34
rs 9.256
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.tests.test_distance.
20
21
This module contains unit tests for abydos.distance
22
"""
23
24
from __future__ import division, unicode_literals
25
26
import math
27
import os
28
import pkgutil
29
import sys
30
import unittest
31
from difflib import SequenceMatcher
32
33
from abydos.compression import ac_train
34
from abydos.distance import _synoname_strip_punct, bag, chebyshev, \
35
    damerau_levenshtein, dist, dist_bag, dist_baystat, dist_chebyshev, \
36
    dist_compression, dist_cosine, dist_damerau, dist_dice, dist_editex, \
37
    dist_euclidean, dist_eudex, dist_hamming, dist_ident, dist_indel, \
38
    dist_jaccard, dist_jaro_winkler, dist_lcsseq, dist_lcsstr, dist_length, \
39
    dist_levenshtein, dist_manhattan, dist_minkowski, dist_mlipns, \
40
    dist_monge_elkan, dist_mra, dist_overlap, dist_prefix, \
41
    dist_ratcliff_obershelp, dist_sift4, dist_strcmp95, dist_suffix, \
42
    dist_tversky, dist_typo, editex, euclidean, eudex_hamming, gotoh, \
43
    hamming, lcsseq, lcsstr, levenshtein, manhattan, minkowski, mra_compare, \
44
    needleman_wunsch, sift4_common, sift4_simplest, sim, sim_bag, \
45
    sim_baystat, sim_chebyshev, sim_compression, sim_cosine, sim_damerau, \
46
    sim_dice, sim_editex, sim_euclidean, sim_eudex, sim_hamming, sim_ident, \
47
    sim_indel, sim_jaccard, sim_jaro_winkler, sim_lcsseq, sim_lcsstr, \
48
    sim_length, sim_levenshtein, sim_manhattan, sim_matrix, sim_minkowski, \
49
    sim_mlipns, sim_monge_elkan, sim_mra, sim_overlap, sim_prefix, \
50
    sim_ratcliff_obershelp, sim_sift4, sim_strcmp95, sim_suffix, \
51
    sim_tanimoto, sim_tversky, sim_typo, smith_waterman, synoname, \
52
    synoname_word_approximation, tanimoto, typo
53
from abydos.qgram import QGrams
54
55
from six.moves import range
56
57
TESTDIR = os.path.dirname(__file__)
58
59
NIALL = ('Niall', 'Neal', 'Neil', 'Njall', 'Njáll', 'Nigel', 'Neel', 'Nele',
60
         'Nigelli', 'Nel', 'Kneale', 'Uí Néill', 'O\'Neill', 'MacNeil',
61
         'MacNele', 'Niall Noígíallach')
62
63
COLIN = ('Colin', 'Collin', 'Cullen', 'Cuilen', 'Cailean', 'MacCailean',
64
         'Cuilén', 'Colle', 'Calum', 'Callum', 'Colinn', 'Colon', 'Colynn',
65
         'Col', 'Cole', 'Nicolas', 'Nicholas', 'Cailean Mór Caimbeul')
66
67
68
class LevenshteinTestCases(unittest.TestCase):
69
    """Test Levenshtein functions.
70
71
    abydos.distance.levenshtein, .dist_levenshtein,
72
    .sim_levenshtein, .damerau, .dist_damerau, & .sim_damerau
73
    """
74
75
    def test_levenshtein(self):
76
        """Test abydos.distance.levenshtein."""
77
        self.assertEqual(levenshtein('', ''), 0)
78
79
        # http://oldfashionedsoftware.com/tag/levenshtein-distance/
80
        self.assertEqual(levenshtein('a', ''), 1)
81
        self.assertEqual(levenshtein('', 'a'), 1)
82
        self.assertEqual(levenshtein('abc', ''), 3)
83
        self.assertEqual(levenshtein('', 'abc'), 3)
84
        self.assertEqual(levenshtein('', ''), 0)
85
        self.assertEqual(levenshtein('a', 'a'), 0)
86
        self.assertEqual(levenshtein('abc', 'abc'), 0)
87
        self.assertEqual(levenshtein('', 'a'), 1)
88
        self.assertEqual(levenshtein('a', 'ab'), 1)
89
        self.assertEqual(levenshtein('b', 'ab'), 1)
90
        self.assertEqual(levenshtein('ac', 'abc'), 1)
91
        self.assertEqual(levenshtein('abcdefg', 'xabxcdxxefxgx'), 6)
92
        self.assertEqual(levenshtein('a', ''), 1)
93
        self.assertEqual(levenshtein('ab', 'a'), 1)
94
        self.assertEqual(levenshtein('ab', 'b'), 1)
95
        self.assertEqual(levenshtein('abc', 'ac'), 1)
96
        self.assertEqual(levenshtein('xabxcdxxefxgx', 'abcdefg'), 6)
97
        self.assertEqual(levenshtein('a', 'b'), 1)
98
        self.assertEqual(levenshtein('ab', 'ac'), 1)
99
        self.assertEqual(levenshtein('ac', 'bc'), 1)
100
        self.assertEqual(levenshtein('abc', 'axc'), 1)
101
        self.assertEqual(levenshtein('xabxcdxxefxgx', '1ab2cd34ef5g6'), 6)
102
        self.assertEqual(levenshtein('example', 'samples'), 3)
103
        self.assertEqual(levenshtein('sturgeon', 'urgently'), 6)
104
        self.assertEqual(levenshtein('levenshtein', 'frankenstein'), 6)
105
        self.assertEqual(levenshtein('distance', 'difference'), 5)
106
        self.assertEqual(levenshtein('java was neat', 'scala is great'), 7)
107
108
        # https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
109
        self.assertEqual(levenshtein('CA', 'ABC', 'dam'), 2)
110
        self.assertEqual(levenshtein('CA', 'ABC', 'osa'), 3)
111
112
        # test cost of insert
113
        self.assertEqual(levenshtein('', 'b', 'lev', cost=(5, 7, 10, 10)), 5)
114
        self.assertEqual(levenshtein('', 'b', 'osa', cost=(5, 7, 10, 10)), 5)
115
        self.assertEqual(levenshtein('', 'b', 'dam', cost=(5, 7, 10, 10)), 5)
116
        self.assertEqual(levenshtein('a', 'ab', 'lev', cost=(5, 7, 10, 10)), 5)
117
        self.assertEqual(levenshtein('a', 'ab', 'osa', cost=(5, 7, 10, 10)), 5)
118
        self.assertEqual(levenshtein('a', 'ab', 'dam', cost=(5, 7, 10, 10)), 5)
119
120
        # test cost of delete
121
        self.assertEqual(levenshtein('b', '', 'lev', cost=(5, 7, 10, 10)), 7)
122
        self.assertEqual(levenshtein('b', '', 'osa', cost=(5, 7, 10, 10)), 7)
123
        self.assertEqual(levenshtein('b', '', 'dam', cost=(5, 7, 10, 10)), 7)
124
        self.assertEqual(levenshtein('ab', 'a', 'lev', cost=(5, 7, 10, 10)), 7)
125
        self.assertEqual(levenshtein('ab', 'a', 'osa', cost=(5, 7, 10, 10)), 7)
126
        self.assertEqual(levenshtein('ab', 'a', 'dam', cost=(5, 7, 10, 10)), 7)
127
128
        # test cost of substitute
129
        self.assertEqual(levenshtein('a', 'b', 'lev', cost=(10, 10, 5, 10)), 5)
130
        self.assertEqual(levenshtein('a', 'b', 'osa', cost=(10, 10, 5, 10)), 5)
131
        self.assertEqual(levenshtein('a', 'b', 'dam', cost=(10, 10, 5, 10)), 5)
132
        self.assertEqual(levenshtein('ac', 'bc', 'lev',
133
                                     cost=(10, 10, 5, 10)), 5)
134
        self.assertEqual(levenshtein('ac', 'bc', 'osa',
135
                                     cost=(10, 10, 5, 10)), 5)
136
        self.assertEqual(levenshtein('ac', 'bc', 'dam',
137
                                     cost=(10, 10, 5, 10)), 5)
138
139
        # test cost of transpose
140
        self.assertEqual(levenshtein('ab', 'ba', 'lev',
141
                                     cost=(10, 10, 10, 5)), 20)
142
        self.assertEqual(levenshtein('ab', 'ba', 'osa',
143
                                     cost=(10, 10, 10, 5)), 5)
144
        self.assertEqual(levenshtein('ab', 'ba', 'dam',
145
                                     cost=(5, 5, 10, 5)), 5)
146
        self.assertEqual(levenshtein('abc', 'bac', 'lev',
147
                                     cost=(10, 10, 10, 5)), 20)
148
        self.assertEqual(levenshtein('abc', 'bac', 'osa',
149
                                     cost=(10, 10, 10, 5)), 5)
150
        self.assertEqual(levenshtein('abc', 'bac', 'dam',
151
                                     cost=(5, 5, 10, 5)), 5)
152
        self.assertEqual(levenshtein('cab', 'cba', 'lev',
153
                                     cost=(10, 10, 10, 5)), 20)
154
        self.assertEqual(levenshtein('cab', 'cba', 'osa',
155
                                     cost=(10, 10, 10, 5)), 5)
156
        self.assertEqual(levenshtein('cab', 'cba', 'dam',
157
                                     cost=(5, 5, 10, 5)), 5)
158
159
        # test exception
160
        self.assertRaises(ValueError, levenshtein, 'ab', 'ba', 'dam',
161
                          cost=(10, 10, 10, 5))
162
163
    def test_dist_levenshtein(self):
164
        """Test abydos.distance.dist_levenshtein."""
165
        self.assertEqual(dist_levenshtein('', ''), 0)
166
167
        self.assertEqual(dist_levenshtein('a', 'a'), 0)
168
        self.assertEqual(dist_levenshtein('ab', 'ab'), 0)
169
        self.assertEqual(dist_levenshtein('', 'a'), 1)
170
        self.assertEqual(dist_levenshtein('', 'ab'), 1)
171
        self.assertEqual(dist_levenshtein('a', 'c'), 1)
172
173
        self.assertAlmostEqual(dist_levenshtein('abc', 'ac'), 1/3)
174
        self.assertAlmostEqual(dist_levenshtein('abbc', 'ac'), 1/2)
175
        self.assertAlmostEqual(dist_levenshtein('abbc', 'abc'), 1/4)
176
177
    def test_sim_levenshtein(self):
178
        """Test abydos.distance.sim_levenshtein."""
179
        self.assertEqual(sim_levenshtein('', ''), 1)
180
181
        self.assertEqual(sim_levenshtein('a', 'a'), 1)
182
        self.assertEqual(sim_levenshtein('ab', 'ab'), 1)
183
        self.assertEqual(sim_levenshtein('', 'a'), 0)
184
        self.assertEqual(sim_levenshtein('', 'ab'), 0)
185
        self.assertEqual(sim_levenshtein('a', 'c'), 0)
186
187
        self.assertAlmostEqual(sim_levenshtein('abc', 'ac'), 2/3)
188
        self.assertAlmostEqual(sim_levenshtein('abbc', 'ac'), 1/2)
189
        self.assertAlmostEqual(sim_levenshtein('abbc', 'abc'), 3/4)
190
191
    def test_damerau_levenshtein(self):
192
        """Test abydos.distance.damerau_levenshtein."""
193
        self.assertEqual(damerau_levenshtein('', ''), 0)
194
        self.assertEqual(damerau_levenshtein('CA', 'CA'), 0)
195
        self.assertEqual(damerau_levenshtein('CA', 'ABC'), 2)
196
        self.assertEqual(damerau_levenshtein('', 'b', cost=(5, 7, 10, 10)), 5)
197
        self.assertEqual(damerau_levenshtein('a', 'ab', cost=(5, 7, 10, 10)),
198
                         5)
199
        self.assertEqual(damerau_levenshtein('b', '', cost=(5, 7, 10, 10)), 7)
200
        self.assertEqual(damerau_levenshtein('ab', 'a', cost=(5, 7, 10, 10)),
201
                         7)
202
        self.assertEqual(damerau_levenshtein('a', 'b', cost=(10, 10, 5, 10)),
203
                         5)
204
        self.assertEqual(damerau_levenshtein('ac', 'bc',
205
                                             cost=(10, 10, 5, 10)), 5)
206
        self.assertEqual(damerau_levenshtein('ab', 'ba',
207
                                             cost=(5, 5, 10, 5)), 5)
208
        self.assertEqual(damerau_levenshtein('abc', 'bac',
209
                                             cost=(5, 5, 10, 5)), 5)
210
        self.assertEqual(damerau_levenshtein('cab', 'cba',
211
                                             cost=(5, 5, 10, 5)), 5)
212
        self.assertRaises(ValueError, damerau_levenshtein, 'ab', 'ba',
213
                          cost=(10, 10, 10, 5))
214
215
    def test_dist_damerau(self):
216
        """Test abydos.distance.dist_damerau."""
217
        self.assertEqual(dist_damerau('', ''), 0)
218
219
        self.assertEqual(dist_damerau('a', 'a'), 0)
220
        self.assertEqual(dist_damerau('ab', 'ab'), 0)
221
        self.assertEqual(dist_damerau('', 'a'), 1)
222
        self.assertEqual(dist_damerau('', 'ab'), 1)
223
        self.assertEqual(dist_damerau('a', 'c'), 1)
224
225
        self.assertAlmostEqual(dist_damerau('abc', 'ac'), 1/3)
226
        self.assertAlmostEqual(dist_damerau('abbc', 'ac'), 1/2)
227
        self.assertAlmostEqual(dist_damerau('abbc', 'abc'), 1/4)
228
229
        self.assertAlmostEqual(dist_damerau('CA', 'ABC'), 2/3)
230
        self.assertAlmostEqual(dist_damerau('', 'b', cost=(5, 7, 10, 10)), 1)
231
        self.assertAlmostEqual(dist_damerau('a', 'ab',
232
                                            cost=(5, 7, 10, 10)), 1/2)
233
        self.assertAlmostEqual(dist_damerau('b', '', cost=(5, 7, 10, 10)), 1)
234
        self.assertAlmostEqual(dist_damerau('ab', 'a',
235
                                            cost=(5, 7, 10, 10)), 1/2)
236
        self.assertAlmostEqual(dist_damerau('a', 'b',
237
                                            cost=(10, 10, 5, 10)), 1/2)
238
        self.assertAlmostEqual(dist_damerau('ac', 'bc',
239
                                            cost=(10, 10, 5, 10)), 1/4)
240
        self.assertAlmostEqual(dist_damerau('ab', 'ba',
241
                                            cost=(5, 5, 10, 5)), 1/2)
242
        self.assertAlmostEqual(dist_damerau('abc', 'bac',
243
                                            cost=(5, 5, 10, 5)), 1/3)
244
        self.assertAlmostEqual(dist_damerau('cab', 'cba',
245
                                            cost=(5, 5, 10, 5)), 1/3)
246
        self.assertRaises(ValueError, dist_damerau, 'ab', 'ba',
247
                          cost=(10, 10, 10, 5))
248
249
    def test_sim_damerau(self):
250
        """Test abydos.distance.sim_damerau."""
251
        self.assertEqual(sim_damerau('', ''), 1)
252
253
        self.assertEqual(sim_damerau('a', 'a'), 1)
254
        self.assertEqual(sim_damerau('ab', 'ab'), 1)
255
        self.assertEqual(sim_damerau('', 'a'), 0)
256
        self.assertEqual(sim_damerau('', 'ab'), 0)
257
        self.assertEqual(sim_damerau('a', 'c'), 0)
258
259
        self.assertAlmostEqual(sim_damerau('abc', 'ac'), 2/3)
260
        self.assertAlmostEqual(sim_damerau('abbc', 'ac'), 1/2)
261
        self.assertAlmostEqual(sim_damerau('abbc', 'abc'), 3/4)
262
263
        self.assertAlmostEqual(sim_damerau('CA', 'ABC'), 1/3)
264
        self.assertAlmostEqual(sim_damerau('', 'b', cost=(5, 7, 10, 10)), 0)
265
        self.assertAlmostEqual(sim_damerau('a', 'ab', cost=(5, 7, 10, 10)),
266
                               1/2)
267
        self.assertAlmostEqual(sim_damerau('b', '', cost=(5, 7, 10, 10)), 0)
268
        self.assertAlmostEqual(sim_damerau('ab', 'a', cost=(5, 7, 10, 10)),
269
                               1/2)
270
        self.assertAlmostEqual(sim_damerau('a', 'b', cost=(10, 10, 5, 10)),
271
                               1/2)
272
        self.assertAlmostEqual(sim_damerau('ac', 'bc',
273
                                           cost=(10, 10, 5, 10)), 3/4)
274
        self.assertAlmostEqual(sim_damerau('ab', 'ba',
275
                                           cost=(5, 5, 10, 5)), 1/2)
276
        self.assertAlmostEqual(sim_damerau('abc', 'bac',
277
                                           cost=(5, 5, 10, 5)), 2/3)
278
        self.assertAlmostEqual(sim_damerau('cab', 'cba',
279
                                           cost=(5, 5, 10, 5)), 2/3)
280
        self.assertRaises(ValueError, sim_damerau, 'ab', 'ba',
281
                          cost=(10, 10, 10, 5))
282
283
284
class HammingTestCases(unittest.TestCase):
285
    """Test Hamming functions.
286
287
    abydos.distance.hamming, .dist_hamming, & .sim_hamming
288
    """
289
290
    def test_hamming(self):
291
        """Test abydos.distance.hamming."""
292
        self.assertEqual(hamming('', ''), 0)
293
        self.assertEqual(hamming('', '', False), 0)
294
295
        self.assertEqual(hamming('a', ''), 1)
296
        self.assertEqual(hamming('a', 'a'), 0)
297
        self.assertEqual(hamming('a', 'a', False), 0)
298
        self.assertEqual(hamming('a', 'b'), 1)
299
        self.assertEqual(hamming('a', 'b', False), 1)
300
        self.assertEqual(hamming('abc', 'cba'), 2)
301
        self.assertEqual(hamming('abc', 'cba', False), 2)
302
        self.assertEqual(hamming('abc', ''), 3)
303
        self.assertEqual(hamming('bb', 'cbab'), 3)
304
305
        # test exception
306
        self.assertRaises(ValueError, hamming, 'ab', 'a', False)
307
308
        # https://en.wikipedia.org/wiki/Hamming_distance
309
        self.assertEqual(hamming('karolin', 'kathrin'), 3)
310
        self.assertEqual(hamming('karolin', 'kerstin'), 3)
311
        self.assertEqual(hamming('1011101', '1001001'), 2)
312
        self.assertEqual(hamming('2173896', '2233796'), 3)
313
314
    def test_dist_hamming(self):
315
        """Test abydos.distance.dist_hamming."""
316
        self.assertEqual(dist_hamming('', ''), 0)
317
        self.assertEqual(dist_hamming('', '', False), 0)
318
319
        self.assertEqual(dist_hamming('a', ''), 1)
320
        self.assertEqual(dist_hamming('a', 'a'), 0)
321
        self.assertEqual(dist_hamming('a', 'a', False), 0)
322
        self.assertEqual(dist_hamming('a', 'b'), 1)
323
        self.assertEqual(dist_hamming('a', 'b', False), 1)
324
        self.assertAlmostEqual(dist_hamming('abc', 'cba'), 2/3)
325
        self.assertAlmostEqual(dist_hamming('abc', 'cba', False), 2/3)
326
        self.assertEqual(dist_hamming('abc', ''), 1)
327
        self.assertAlmostEqual(dist_hamming('bb', 'cbab'), 3/4)
328
329
        # test exception
330
        self.assertRaises(ValueError, dist_hamming, 'ab', 'a', False)
331
332
        # https://en.wikipedia.org/wiki/Hamming_distance
333
        self.assertAlmostEqual(dist_hamming('karolin', 'kathrin'), 3/7)
334
        self.assertAlmostEqual(dist_hamming('karolin', 'kerstin'), 3/7)
335
        self.assertAlmostEqual(dist_hamming('1011101', '1001001'), 2/7)
336
        self.assertAlmostEqual(dist_hamming('2173896', '2233796'), 3/7)
337
338
    def test_sim_hamming(self):
339
        """Test abydos.distance.sim_hamming."""
340
        self.assertEqual(sim_hamming('', ''), 1)
341
        self.assertEqual(sim_hamming('', '', False), 1)
342
343
        self.assertEqual(sim_hamming('a', ''), 0)
344
        self.assertEqual(sim_hamming('a', 'a'), 1)
345
        self.assertEqual(sim_hamming('a', 'a', False), 1)
346
        self.assertEqual(sim_hamming('a', 'b'), 0)
347
        self.assertEqual(sim_hamming('a', 'b', False), 0)
348
        self.assertAlmostEqual(sim_hamming('abc', 'cba'), 1/3)
349
        self.assertAlmostEqual(sim_hamming('abc', 'cba', False), 1/3)
350
        self.assertEqual(sim_hamming('abc', ''), 0)
351
        self.assertAlmostEqual(sim_hamming('bb', 'cbab'), 1/4)
352
353
        # test exception
354
        self.assertRaises(ValueError, sim_hamming, 'ab', 'a', False)
355
356
        # https://en.wikipedia.org/wiki/Hamming_distance
357
        self.assertAlmostEqual(sim_hamming('karolin', 'kathrin'), 4/7)
358
        self.assertAlmostEqual(sim_hamming('karolin', 'kerstin'), 4/7)
359
        self.assertAlmostEqual(sim_hamming('1011101', '1001001'), 5/7)
360
        self.assertAlmostEqual(sim_hamming('2173896', '2233796'), 4/7)
361
362
363
NONQ_FROM = 'The quick brown fox jumped over the lazy dog.'
364
NONQ_TO = 'That brown dog jumped over the fox.'
365
366
367
class TverskyIndexTestCases(unittest.TestCase):
368
    """Test Tversky functions.
369
370
    abydos.distance.sim_tversky & .dist_tversky
371
    """
372
373
    def test_sim_tversky(self):
374
        """Test abydos.distance.sim_tversky."""
375
        self.assertEqual(sim_tversky('', ''), 1)
376
        self.assertEqual(sim_tversky('nelson', ''), 0)
377
        self.assertEqual(sim_tversky('', 'neilsen'), 0)
378
        self.assertAlmostEqual(sim_tversky('nelson', 'neilsen'), 4/11)
379
380
        self.assertEqual(sim_tversky('', '', 2), 1)
381
        self.assertEqual(sim_tversky('nelson', '', 2), 0)
382
        self.assertEqual(sim_tversky('', 'neilsen', 2), 0)
383
        self.assertAlmostEqual(sim_tversky('nelson', 'neilsen', 2), 4/11)
384
385
        # test valid alpha & beta
386
        self.assertRaises(ValueError, sim_tversky, 'abcd', 'dcba', 2, -1, -1)
387
        self.assertRaises(ValueError, sim_tversky, 'abcd', 'dcba', 2, -1, 0)
388
        self.assertRaises(ValueError, sim_tversky, 'abcd', 'dcba', 2, 0, -1)
389
390
        # test empty QGrams
391
        self.assertAlmostEqual(sim_tversky('nelson', 'neilsen', 7), 0.0)
392
393
        # test unequal alpha & beta
394
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 1), 3/11)
395
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 1, 2), 3/10)
396
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 2), 3/13)
397
398
        # test bias parameter
399
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 1, 1, 0.5),
400
                               7/11)
401
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 1, 0.5), 7/9)
402
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 1, 2, 0.5),
403
                               7/15)
404
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 2, 0.5),
405
                               7/11)
406
407
        # supplied q-gram tests
408
        self.assertEqual(sim_tversky(QGrams(''), QGrams('')), 1)
409
        self.assertEqual(sim_tversky(QGrams('nelson'), QGrams('')), 0)
410
        self.assertEqual(sim_tversky(QGrams(''), QGrams('neilsen')), 0)
411
        self.assertAlmostEqual(sim_tversky(QGrams('nelson'),
412
                                           QGrams('neilsen')), 4/11)
413
414
        # non-q-gram tests
415
        self.assertEqual(sim_tversky('', '', 0), 1)
416
        self.assertEqual(sim_tversky('the quick', '', 0), 0)
417
        self.assertEqual(sim_tversky('', 'the quick', 0), 0)
418
        self.assertAlmostEqual(sim_tversky(NONQ_FROM, NONQ_TO, 0), 1/3)
419
        self.assertAlmostEqual(sim_tversky(NONQ_TO, NONQ_FROM, 0), 1/3)
420
421
    def test_dist_tversky(self):
422
        """Test abydos.distance.dist_tversky."""
423
        self.assertEqual(dist_tversky('', ''), 0)
424
        self.assertEqual(dist_tversky('nelson', ''), 1)
425
        self.assertEqual(dist_tversky('', 'neilsen'), 1)
426
        self.assertAlmostEqual(dist_tversky('nelson', 'neilsen'), 7/11)
427
428
        self.assertEqual(dist_tversky('', '', 2), 0)
429
        self.assertEqual(dist_tversky('nelson', '', 2), 1)
430
        self.assertEqual(dist_tversky('', 'neilsen', 2), 1)
431
        self.assertAlmostEqual(dist_tversky('nelson', 'neilsen', 2), 7/11)
432
433
        # test valid alpha & beta
434
        self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, -1, -1)
435
        self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, -1, 0)
436
        self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, 0, -1)
437
438
        # test empty QGrams
439
        self.assertAlmostEqual(dist_tversky('nelson', 'neilsen', 7), 1.0)
440
441
        # test unequal alpha & beta
442
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 1), 8/11)
443
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 2), 7/10)
444
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 2), 10/13)
445
446
        # test bias parameter
447
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 1, 0.5),
448
                               4/11)
449
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 1, 0.5),
450
                               2/9)
451
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 2, 0.5),
452
                               8/15)
453
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 2, 0.5),
454
                               4/11)
455
456
        # supplied q-gram tests
457
        self.assertEqual(dist_tversky(QGrams(''), QGrams('')), 0)
458
        self.assertEqual(dist_tversky(QGrams('nelson'), QGrams('')), 1)
459
        self.assertEqual(dist_tversky(QGrams(''), QGrams('neilsen')), 1)
460
        self.assertAlmostEqual(dist_tversky(QGrams('nelson'),
461
                                            QGrams('neilsen')), 7/11)
462
463
        # non-q-gram tests
464
        self.assertEqual(dist_tversky('', '', 0), 0)
465
        self.assertEqual(dist_tversky('the quick', '', 0), 1)
466
        self.assertEqual(dist_tversky('', 'the quick', 0), 1)
467
        self.assertAlmostEqual(dist_tversky(NONQ_FROM, NONQ_TO, 0), 2/3)
468
        self.assertAlmostEqual(dist_tversky(NONQ_TO, NONQ_FROM, 0), 2/3)
469
470
471
class DiceTestCases(unittest.TestCase):
472
    """Test Dice functions.
473
474
    abydos.distance.sim_dice & .dist_dice
475
    """
476
477
    def test_sim_dice(self):
478
        """Test abydos.distance.sim_dice."""
479
        self.assertEqual(sim_dice('', ''), 1)
480
        self.assertEqual(sim_dice('nelson', ''), 0)
481
        self.assertEqual(sim_dice('', 'neilsen'), 0)
482
        self.assertAlmostEqual(sim_dice('nelson', 'neilsen'), 8/15)
483
484
        self.assertEqual(sim_dice('', '', 2), 1)
485
        self.assertEqual(sim_dice('nelson', '', 2), 0)
486
        self.assertEqual(sim_dice('', 'neilsen', 2), 0)
487
        self.assertAlmostEqual(sim_dice('nelson', 'neilsen', 2), 8/15)
488
489
        # supplied q-gram tests
490
        self.assertEqual(sim_dice(QGrams(''), QGrams('')), 1)
491
        self.assertEqual(sim_dice(QGrams('nelson'), QGrams('')), 0)
492
        self.assertEqual(sim_dice(QGrams(''), QGrams('neilsen')), 0)
493
        self.assertAlmostEqual(sim_dice(QGrams('nelson'), QGrams('neilsen')),
494
                               8/15)
495
496
        # non-q-gram tests
497
        self.assertEqual(sim_dice('', '', 0), 1)
498
        self.assertEqual(sim_dice('the quick', '', 0), 0)
499
        self.assertEqual(sim_dice('', 'the quick', 0), 0)
500
        self.assertAlmostEqual(sim_dice(NONQ_FROM, NONQ_TO, 0), 1/2)
501
        self.assertAlmostEqual(sim_dice(NONQ_TO, NONQ_FROM, 0), 1/2)
502
503
    def test_dist_dice(self):
504
        """Test abydos.distance.dist_dice."""
505
        self.assertEqual(dist_dice('', ''), 0)
506
        self.assertEqual(dist_dice('nelson', ''), 1)
507
        self.assertEqual(dist_dice('', 'neilsen'), 1)
508
        self.assertAlmostEqual(dist_dice('nelson', 'neilsen'), 7/15)
509
510
        self.assertEqual(dist_dice('', '', 2), 0)
511
        self.assertEqual(dist_dice('nelson', '', 2), 1)
512
        self.assertEqual(dist_dice('', 'neilsen', 2), 1)
513
        self.assertAlmostEqual(dist_dice('nelson', 'neilsen', 2), 7/15)
514
515
        # supplied q-gram tests
516
        self.assertEqual(dist_dice(QGrams(''), QGrams('')), 0)
517
        self.assertEqual(dist_dice(QGrams('nelson'), QGrams('')), 1)
518
        self.assertEqual(dist_dice(QGrams(''), QGrams('neilsen')), 1)
519
        self.assertAlmostEqual(dist_dice(QGrams('nelson'), QGrams('neilsen')),
520
                               7/15)
521
522
        # non-q-gram tests
523
        self.assertEqual(dist_dice('', '', 0), 0)
524
        self.assertEqual(dist_dice('the quick', '', 0), 1)
525
        self.assertEqual(dist_dice('', 'the quick', 0), 1)
526
        self.assertAlmostEqual(dist_dice(NONQ_FROM, NONQ_TO, 0), 1/2)
527
        self.assertAlmostEqual(dist_dice(NONQ_TO, NONQ_FROM, 0), 1/2)
528
529
530
class JaccardTestCases(unittest.TestCase):
531
    """Test Jaccard functions.
532
533
    abydos.distance.sim_jaccard & .dist_jaccard
534
    """
535
536
    def test_sim_jaccard(self):
537
        """Test abydos.distance.sim_jaccard."""
538
        self.assertEqual(sim_jaccard('', ''), 1)
539
        self.assertEqual(sim_jaccard('nelson', ''), 0)
540
        self.assertEqual(sim_jaccard('', 'neilsen'), 0)
541
        self.assertAlmostEqual(sim_jaccard('nelson', 'neilsen'), 4/11)
542
543
        self.assertEqual(sim_jaccard('', '', 2), 1)
544
        self.assertEqual(sim_jaccard('nelson', '', 2), 0)
545
        self.assertEqual(sim_jaccard('', 'neilsen', 2), 0)
546
        self.assertAlmostEqual(sim_jaccard('nelson', 'neilsen', 2), 4/11)
547
548
        # supplied q-gram tests
549
        self.assertEqual(sim_jaccard(QGrams(''), QGrams('')), 1)
550
        self.assertEqual(sim_jaccard(QGrams('nelson'), QGrams('')), 0)
551
        self.assertEqual(sim_jaccard(QGrams(''), QGrams('neilsen')), 0)
552
        self.assertAlmostEqual(sim_jaccard(QGrams('nelson'),
553
                                           QGrams('neilsen')), 4/11)
554
555
        # non-q-gram tests
556
        self.assertEqual(sim_jaccard('', '', 0), 1)
557
        self.assertEqual(sim_jaccard('the quick', '', 0), 0)
558
        self.assertEqual(sim_jaccard('', 'the quick', 0), 0)
559
        self.assertAlmostEqual(sim_jaccard(NONQ_FROM, NONQ_TO, 0), 1/3)
560
        self.assertAlmostEqual(sim_jaccard(NONQ_TO, NONQ_FROM, 0), 1/3)
561
562
    def test_dist_jaccard(self):
563
        """Test abydos.distance.dist_jaccard."""
564
        self.assertEqual(dist_jaccard('', ''), 0)
565
        self.assertEqual(dist_jaccard('nelson', ''), 1)
566
        self.assertEqual(dist_jaccard('', 'neilsen'), 1)
567
        self.assertAlmostEqual(dist_jaccard('nelson', 'neilsen'), 7/11)
568
569
        self.assertEqual(dist_jaccard('', '', 2), 0)
570
        self.assertEqual(dist_jaccard('nelson', '', 2), 1)
571
        self.assertEqual(dist_jaccard('', 'neilsen', 2), 1)
572
        self.assertAlmostEqual(dist_jaccard('nelson', 'neilsen', 2), 7/11)
573
574
        # supplied q-gram tests
575
        self.assertEqual(dist_jaccard(QGrams(''), QGrams('')), 0)
576
        self.assertEqual(dist_jaccard(QGrams('nelson'), QGrams('')), 1)
577
        self.assertEqual(dist_jaccard(QGrams(''), QGrams('neilsen')), 1)
578
        self.assertAlmostEqual(dist_jaccard(QGrams('nelson'),
579
                                            QGrams('neilsen')), 7/11)
580
581
        # non-q-gram tests
582
        self.assertEqual(dist_jaccard('', '', 0), 0)
583
        self.assertEqual(dist_jaccard('the quick', '', 0), 1)
584
        self.assertEqual(dist_jaccard('', 'the quick', 0), 1)
585
        self.assertAlmostEqual(dist_jaccard(NONQ_FROM, NONQ_TO, 0), 2/3)
586
        self.assertAlmostEqual(dist_jaccard(NONQ_TO, NONQ_FROM, 0), 2/3)
587
588
589
class OverlapTestCases(unittest.TestCase):
590
    """Test overlap functions.
591
592
    abydos.distance.sim_overlap & .dist_overlap
593
    """
594
595
    def test_sim_overlap(self):
596
        """Test abydos.distance.sim_overlap."""
597
        self.assertEqual(sim_overlap('', ''), 1)
598
        self.assertEqual(sim_overlap('nelson', ''), 0)
599
        self.assertEqual(sim_overlap('', 'neilsen'), 0)
600
        self.assertAlmostEqual(sim_overlap('nelson', 'neilsen'), 4/7)
601
602
        self.assertEqual(sim_overlap('', '', 2), 1)
603
        self.assertEqual(sim_overlap('nelson', '', 2), 0)
604
        self.assertEqual(sim_overlap('', 'neilsen', 2), 0)
605
        self.assertAlmostEqual(sim_overlap('nelson', 'neilsen', 2), 4/7)
606
607
        # supplied q-gram tests
608
        self.assertEqual(sim_overlap(QGrams(''), QGrams('')), 1)
609
        self.assertEqual(sim_overlap(QGrams('nelson'), QGrams('')), 0)
610
        self.assertEqual(sim_overlap(QGrams(''), QGrams('neilsen')), 0)
611
        self.assertAlmostEqual(sim_overlap(QGrams('nelson'),
612
                                           QGrams('neilsen')), 4/7)
613
614
        # non-q-gram tests
615
        self.assertEqual(sim_overlap('', '', 0), 1)
616
        self.assertEqual(sim_overlap('the quick', '', 0), 0)
617
        self.assertEqual(sim_overlap('', 'the quick', 0), 0)
618
        self.assertAlmostEqual(sim_overlap(NONQ_FROM, NONQ_TO, 0), 4/7)
619
        self.assertAlmostEqual(sim_overlap(NONQ_TO, NONQ_FROM, 0), 4/7)
620
621
    def test_dist_overlap(self):
622
        """Test abydos.distance.dist_overlap."""
623
        self.assertEqual(dist_overlap('', ''), 0)
624
        self.assertEqual(dist_overlap('nelson', ''), 1)
625
        self.assertEqual(dist_overlap('', 'neilsen'), 1)
626
        self.assertAlmostEqual(dist_overlap('nelson', 'neilsen'), 3/7)
627
628
        self.assertEqual(dist_overlap('', '', 2), 0)
629
        self.assertEqual(dist_overlap('nelson', '', 2), 1)
630
        self.assertEqual(dist_overlap('', 'neilsen', 2), 1)
631
        self.assertAlmostEqual(dist_overlap('nelson', 'neilsen', 2), 3/7)
632
633
        # supplied q-gram tests
634
        self.assertEqual(dist_overlap(QGrams(''), QGrams('')), 0)
635
        self.assertEqual(dist_overlap(QGrams('nelson'), QGrams('')), 1)
636
        self.assertEqual(dist_overlap(QGrams(''), QGrams('neilsen')), 1)
637
        self.assertAlmostEqual(dist_overlap(QGrams('nelson'),
638
                                            QGrams('neilsen')), 3/7)
639
640
        # non-q-gram tests
641
        self.assertEqual(dist_overlap('', '', 0), 0)
642
        self.assertEqual(dist_overlap('the quick', '', 0), 1)
643
        self.assertEqual(dist_overlap('', 'the quick', 0), 1)
644
        self.assertAlmostEqual(dist_overlap(NONQ_FROM, NONQ_TO, 0), 3/7)
645
        self.assertAlmostEqual(dist_overlap(NONQ_TO, NONQ_FROM, 0), 3/7)
646
647
648
class TanimotoTestCases(unittest.TestCase):
649
    """Test Tanimoto functions.
650
651
    abydos.distance.sim_tanimoto & .tanimoto
652
    """
653
654
    def test_tanimoto_coeff(self):
655
        """Test abydos.distance.sim_tanimoto."""
656
        self.assertEqual(sim_tanimoto('', ''), 1)
657
        self.assertEqual(sim_tanimoto('nelson', ''), 0)
658
        self.assertEqual(sim_tanimoto('', 'neilsen'), 0)
659
        self.assertAlmostEqual(sim_tanimoto('nelson', 'neilsen'), 4/11)
660
661
        self.assertEqual(sim_tanimoto('', '', 2), 1)
662
        self.assertEqual(sim_tanimoto('nelson', '', 2), 0)
663
        self.assertEqual(sim_tanimoto('', 'neilsen', 2), 0)
664
        self.assertAlmostEqual(sim_tanimoto('nelson', 'neilsen', 2), 4/11)
665
666
        # supplied q-gram tests
667
        self.assertEqual(sim_tanimoto(QGrams(''), QGrams('')), 1)
668
        self.assertEqual(sim_tanimoto(QGrams('nelson'), QGrams('')), 0)
669
        self.assertEqual(sim_tanimoto(QGrams(''), QGrams('neilsen')), 0)
670
        self.assertAlmostEqual(sim_tanimoto(QGrams('nelson'),
671
                                            QGrams('neilsen')), 4/11)
672
673
        # non-q-gram tests
674
        self.assertEqual(sim_tanimoto('', '', 0), 1)
675
        self.assertEqual(sim_tanimoto('the quick', '', 0), 0)
676
        self.assertEqual(sim_tanimoto('', 'the quick', 0), 0)
677
        self.assertAlmostEqual(sim_tanimoto(NONQ_FROM, NONQ_TO, 0), 1/3)
678
        self.assertAlmostEqual(sim_tanimoto(NONQ_TO, NONQ_FROM, 0), 1/3)
679
680
    def test_tanimoto(self):
681
        """Test abydos.distance.tanimoto."""
682
        self.assertEqual(tanimoto('', ''), 0)
683
        self.assertEqual(tanimoto('nelson', ''), float('-inf'))
684
        self.assertEqual(tanimoto('', 'neilsen'), float('-inf'))
685
        self.assertAlmostEqual(tanimoto('nelson', 'neilsen'),
686
                               math.log(4/11, 2))
687
688
        self.assertEqual(tanimoto('', '', 2), 0)
689
        self.assertEqual(tanimoto('nelson', '', 2), float('-inf'))
690
        self.assertEqual(tanimoto('', 'neilsen', 2), float('-inf'))
691
        self.assertAlmostEqual(tanimoto('nelson', 'neilsen', 2),
692
                               math.log(4/11, 2))
693
694
        # supplied q-gram tests
695
        self.assertEqual(tanimoto(QGrams(''), QGrams('')), 0)
696
        self.assertEqual(tanimoto(QGrams('nelson'), QGrams('')), float('-inf'))
697
        self.assertEqual(tanimoto(QGrams(''), QGrams('neilsen')),
698
                         float('-inf'))
699
        self.assertAlmostEqual(tanimoto(QGrams('nelson'), QGrams('neilsen')),
700
                               math.log(4/11, 2))
701
702
        # non-q-gram tests
703
        self.assertEqual(tanimoto('', '', 0), 0)
704
        self.assertEqual(tanimoto('the quick', '', 0), float('-inf'))
705
        self.assertEqual(tanimoto('', 'the quick', 0), float('-inf'))
706
        self.assertAlmostEqual(tanimoto(NONQ_FROM, NONQ_TO, 0),
707
                               math.log(1/3, 2))
708
        self.assertAlmostEqual(tanimoto(NONQ_TO, NONQ_FROM, 0),
709
                               math.log(1/3, 2))
710
711
712
class MinkowskiTestCases(unittest.TestCase):
713
    """Test Minkowski functions.
714
715
    abydos.distance.minkowski, sim_minkowski & .dist_minkowski
716
    """
717
718
    def test_minkowski(self):
719
        """Test abydos.distance.minkowski."""
720
        self.assertEqual(minkowski('', ''), 0)
721
        self.assertEqual(minkowski('nelson', ''), 7)
722
        self.assertEqual(minkowski('', 'neilsen'), 8)
723
        self.assertAlmostEqual(minkowski('nelson', 'neilsen'), 7)
724
725
        self.assertEqual(minkowski('', '', 2), 0)
726
        self.assertEqual(minkowski('nelson', '', 2), 7)
727
        self.assertEqual(minkowski('', 'neilsen', 2), 8)
728
        self.assertAlmostEqual(minkowski('nelson', 'neilsen', 2), 7)
729
730
        # supplied q-gram tests
731
        self.assertEqual(minkowski(QGrams(''), QGrams('')), 0)
732
        self.assertEqual(minkowski(QGrams('nelson'), QGrams('')), 7)
733
        self.assertEqual(minkowski(QGrams(''), QGrams('neilsen')), 8)
734
        self.assertAlmostEqual(minkowski(QGrams('nelson'),
735
                                         QGrams('neilsen')), 7)
736
737
        # non-q-gram tests
738
        self.assertEqual(minkowski('', '', 0), 0)
739
        self.assertEqual(minkowski('the quick', '', 0), 2)
740
        self.assertEqual(minkowski('', 'the quick', 0), 2)
741
        self.assertAlmostEqual(minkowski(NONQ_FROM, NONQ_TO, 0), 8)
742
        self.assertAlmostEqual(minkowski(NONQ_TO, NONQ_FROM, 0), 8)
743
744
        # test l_0 "norm"
745
        self.assertEqual(minkowski('', '', 1, 0), 0)
746
        self.assertEqual(minkowski('a', '', 1, 0), 1)
747
        self.assertEqual(minkowski('a', 'b', 1, 0), 2)
748
        self.assertEqual(minkowski('ab', 'b', 1, 0), 1)
749
        self.assertEqual(minkowski('aab', 'b', 1, 0), 1)
750
        self.assertEqual(minkowski('', '', 1, 0, True), 0)
751
        self.assertEqual(minkowski('a', '', 1, 0, True), 1)
752
        self.assertEqual(minkowski('a', 'b', 1, 0, True), 1)
753
        self.assertEqual(minkowski('ab', 'b', 1, 0, True), 1/2)
754
        self.assertEqual(minkowski('aab', 'b', 1, 0, True), 1/2)
755
        self.assertEqual(minkowski('aaab', 'b', 1, 0, True), 1/2)
756
        self.assertEqual(minkowski('aaab', 'ab', 1, 0, True), 1/2)
757
758
        # test with alphabet
759
        self.assertEqual(minkowski('ab', 'b', 1, alphabet=26), 1)
760
        self.assertEqual(minkowski('ab', 'b', 1, normalize=True, alphabet=26),
761
                         1/26)
762
        self.assertEqual(minkowski('ab', 'b', 1, normalize=True,
763
                                   alphabet='abcdefghijklmnopqrstuvwxyz'),
764
                         1/26)
765
766
    def test_sim_minkowski(self):
767
        """Test abydos.distance.sim_minkowski."""
768
        self.assertEqual(sim_minkowski('', ''), 1)
769
        self.assertEqual(sim_minkowski('nelson', ''), 0)
770
        self.assertEqual(sim_minkowski('', 'neilsen'), 0)
771
        self.assertAlmostEqual(sim_minkowski('nelson', 'neilsen'), 8/15)
772
773
        self.assertEqual(sim_minkowski('', '', 2), 1)
774
        self.assertEqual(sim_minkowski('nelson', '', 2), 0)
775
        self.assertEqual(sim_minkowski('', 'neilsen', 2), 0)
776
        self.assertAlmostEqual(sim_minkowski('nelson', 'neilsen', 2), 8/15)
777
778
        # supplied q-gram tests
779
        self.assertEqual(sim_minkowski(QGrams(''), QGrams('')), 1)
780
        self.assertEqual(sim_minkowski(QGrams('nelson'), QGrams('')), 0)
781
        self.assertEqual(sim_minkowski(QGrams(''), QGrams('neilsen')), 0)
782
        self.assertAlmostEqual(sim_minkowski(QGrams('nelson'),
783
                                             QGrams('neilsen')), 8/15)
784
785
        # non-q-gram tests
786
        self.assertEqual(sim_minkowski('', '', 0), 1)
787
        self.assertEqual(sim_minkowski('the quick', '', 0), 0)
788
        self.assertEqual(sim_minkowski('', 'the quick', 0), 0)
789
        self.assertAlmostEqual(sim_minkowski(NONQ_FROM, NONQ_TO, 0), 1/2)
790
        self.assertAlmostEqual(sim_minkowski(NONQ_TO, NONQ_FROM, 0), 1/2)
791
792
    def test_dist_minkowski(self):
793
        """Test abydos.distance.dist_minkowski."""
794
        self.assertEqual(dist_minkowski('', ''), 0)
795
        self.assertEqual(dist_minkowski('nelson', ''), 1)
796
        self.assertEqual(dist_minkowski('', 'neilsen'), 1)
797
        self.assertAlmostEqual(dist_minkowski('nelson', 'neilsen'), 7/15)
798
799
        self.assertEqual(dist_minkowski('', '', 2), 0)
800
        self.assertEqual(dist_minkowski('nelson', '', 2), 1)
801
        self.assertEqual(dist_minkowski('', 'neilsen', 2), 1)
802
        self.assertAlmostEqual(dist_minkowski('nelson', 'neilsen', 2), 7/15)
803
804
        # supplied q-gram tests
805
        self.assertEqual(dist_minkowski(QGrams(''), QGrams('')), 0)
806
        self.assertEqual(dist_minkowski(QGrams('nelson'), QGrams('')), 1)
807
        self.assertEqual(dist_minkowski(QGrams(''), QGrams('neilsen')), 1)
808
        self.assertAlmostEqual(dist_minkowski(QGrams('nelson'),
809
                                              QGrams('neilsen')), 7/15)
810
811
        # non-q-gram tests
812
        self.assertEqual(dist_minkowski('', '', 0), 0)
813
        self.assertEqual(dist_minkowski('the quick', '', 0), 1)
814
        self.assertEqual(dist_minkowski('', 'the quick', 0), 1)
815
        self.assertAlmostEqual(dist_minkowski(NONQ_FROM, NONQ_TO, 0), 1/2)
816
        self.assertAlmostEqual(dist_minkowski(NONQ_TO, NONQ_FROM, 0), 1/2)
817
818
819
class ManhattanTestCases(unittest.TestCase):
820
    """Test Manhattan functions.
821
822
    abydos.distance.manhattan, sim_manhattan & .dist_manhattan
823
    """
824
825
    def test_manhattan(self):
826
        """Test abydos.distance.manhattan."""
827
        self.assertEqual(manhattan('', ''), 0)
828
        self.assertEqual(manhattan('nelson', ''), 7)
829
        self.assertEqual(manhattan('', 'neilsen'), 8)
830
        self.assertAlmostEqual(manhattan('nelson', 'neilsen'), 7)
831
832
        self.assertEqual(manhattan('', '', 2), 0)
833
        self.assertEqual(manhattan('nelson', '', 2), 7)
834
        self.assertEqual(manhattan('', 'neilsen', 2), 8)
835
        self.assertAlmostEqual(manhattan('nelson', 'neilsen', 2), 7)
836
837
        # supplied q-gram tests
838
        self.assertEqual(manhattan(QGrams(''), QGrams('')), 0)
839
        self.assertEqual(manhattan(QGrams('nelson'), QGrams('')), 7)
840
        self.assertEqual(manhattan(QGrams(''), QGrams('neilsen')), 8)
841
        self.assertAlmostEqual(manhattan(QGrams('nelson'),
842
                                         QGrams('neilsen')), 7)
843
844
        # non-q-gram tests
845
        self.assertEqual(manhattan('', '', 0), 0)
846
        self.assertEqual(manhattan('the quick', '', 0), 2)
847
        self.assertEqual(manhattan('', 'the quick', 0), 2)
848
        self.assertAlmostEqual(manhattan(NONQ_FROM, NONQ_TO, 0), 8)
849
        self.assertAlmostEqual(manhattan(NONQ_TO, NONQ_FROM, 0), 8)
850
851
    def test_sim_manhattan(self):
852
        """Test abydos.distance.sim_manhattan."""
853
        self.assertEqual(sim_manhattan('', ''), 1)
854
        self.assertEqual(sim_manhattan('nelson', ''), 0)
855
        self.assertEqual(sim_manhattan('', 'neilsen'), 0)
856
        self.assertAlmostEqual(sim_manhattan('nelson', 'neilsen'), 8/15)
857
858
        self.assertEqual(sim_manhattan('', '', 2), 1)
859
        self.assertEqual(sim_manhattan('nelson', '', 2), 0)
860
        self.assertEqual(sim_manhattan('', 'neilsen', 2), 0)
861
        self.assertAlmostEqual(sim_manhattan('nelson', 'neilsen', 2), 8/15)
862
863
        # supplied q-gram tests
864
        self.assertEqual(sim_manhattan(QGrams(''), QGrams('')), 1)
865
        self.assertEqual(sim_manhattan(QGrams('nelson'), QGrams('')), 0)
866
        self.assertEqual(sim_manhattan(QGrams(''), QGrams('neilsen')), 0)
867
        self.assertAlmostEqual(sim_manhattan(QGrams('nelson'),
868
                                             QGrams('neilsen')), 8/15)
869
870
        # non-q-gram tests
871
        self.assertEqual(sim_manhattan('', '', 0), 1)
872
        self.assertEqual(sim_manhattan('the quick', '', 0), 0)
873
        self.assertEqual(sim_manhattan('', 'the quick', 0), 0)
874
        self.assertAlmostEqual(sim_manhattan(NONQ_FROM, NONQ_TO, 0), 1/2)
875
        self.assertAlmostEqual(sim_manhattan(NONQ_TO, NONQ_FROM, 0), 1/2)
876
877
    def test_dist_manhattan(self):
878
        """Test abydos.distance.dist_manhattan."""
879
        self.assertEqual(dist_manhattan('', ''), 0)
880
        self.assertEqual(dist_manhattan('nelson', ''), 1)
881
        self.assertEqual(dist_manhattan('', 'neilsen'), 1)
882
        self.assertAlmostEqual(dist_manhattan('nelson', 'neilsen'), 7/15)
883
884
        self.assertEqual(dist_manhattan('', '', 2), 0)
885
        self.assertEqual(dist_manhattan('nelson', '', 2), 1)
886
        self.assertEqual(dist_manhattan('', 'neilsen', 2), 1)
887
        self.assertAlmostEqual(dist_manhattan('nelson', 'neilsen', 2), 7/15)
888
889
        # supplied q-gram tests
890
        self.assertEqual(dist_manhattan(QGrams(''), QGrams('')), 0)
891
        self.assertEqual(dist_manhattan(QGrams('nelson'), QGrams('')), 1)
892
        self.assertEqual(dist_manhattan(QGrams(''), QGrams('neilsen')), 1)
893
        self.assertAlmostEqual(dist_manhattan(QGrams('nelson'),
894
                                              QGrams('neilsen')), 7/15)
895
896
        # non-q-gram tests
897
        self.assertEqual(dist_manhattan('', '', 0), 0)
898
        self.assertEqual(dist_manhattan('the quick', '', 0), 1)
899
        self.assertEqual(dist_manhattan('', 'the quick', 0), 1)
900
        self.assertAlmostEqual(dist_manhattan(NONQ_FROM, NONQ_TO, 0), 1/2)
901
        self.assertAlmostEqual(dist_manhattan(NONQ_TO, NONQ_FROM, 0), 1/2)
902
903
904
class EuclideanTestCases(unittest.TestCase):
905
    """Test Euclidean functions.
906
907
    abydos.distance.euclidean, sim_euclidean & .dist_euclidean
908
    """
909
910
    def test_euclidean(self):
911
        """Test abydos.distance.euclidean."""
912
        self.assertEqual(euclidean('', ''), 0)
913
        self.assertEqual(euclidean('nelson', ''), 7**0.5)
914
        self.assertEqual(euclidean('', 'neilsen'), 8**0.5)
915
        self.assertAlmostEqual(euclidean('nelson', 'neilsen'), 7**0.5)
916
917
        self.assertEqual(euclidean('', '', 2), 0)
918
        self.assertEqual(euclidean('nelson', '', 2), 7**0.5)
919
        self.assertEqual(euclidean('', 'neilsen', 2), 8**0.5)
920
        self.assertAlmostEqual(euclidean('nelson', 'neilsen', 2), 7**0.5)
921
922
        # supplied q-gram tests
923
        self.assertEqual(euclidean(QGrams(''), QGrams('')), 0)
924
        self.assertEqual(euclidean(QGrams('nelson'), QGrams('')), 7**0.5)
925
        self.assertEqual(euclidean(QGrams(''), QGrams('neilsen')), 8**0.5)
926
        self.assertAlmostEqual(euclidean(QGrams('nelson'),
927
                                         QGrams('neilsen')), 7**0.5)
928
929
        # non-q-gram tests
930
        self.assertEqual(euclidean('', '', 0), 0)
931
        self.assertEqual(euclidean('the quick', '', 0), 2**0.5)
932
        self.assertEqual(euclidean('', 'the quick', 0), 2**0.5)
933
        self.assertAlmostEqual(euclidean(NONQ_FROM, NONQ_TO, 0), 8**0.5)
934
        self.assertAlmostEqual(euclidean(NONQ_TO, NONQ_FROM, 0), 8**0.5)
935
936
    def test_sim_euclidean(self):
937
        """Test abydos.distance.sim_euclidean."""
938
        self.assertEqual(sim_euclidean('', ''), 1)
939
        self.assertEqual(sim_euclidean('nelson', ''), 0)
940
        self.assertEqual(sim_euclidean('', 'neilsen'), 0)
941
        self.assertAlmostEqual(sim_euclidean('nelson', 'neilsen'),
942
                               1-7**0.5/23**0.5)
943
944
        self.assertEqual(sim_euclidean('', '', 2), 1)
945
        self.assertEqual(sim_euclidean('nelson', '', 2), 0)
946
        self.assertEqual(sim_euclidean('', 'neilsen', 2), 0)
947
        self.assertAlmostEqual(sim_euclidean('nelson', 'neilsen', 2),
948
                               1-7**0.5/23**0.5)
949
950
        # supplied q-gram tests
951
        self.assertEqual(sim_euclidean(QGrams(''), QGrams('')), 1)
952
        self.assertEqual(sim_euclidean(QGrams('nelson'), QGrams('')), 0)
953
        self.assertEqual(sim_euclidean(QGrams(''), QGrams('neilsen')), 0)
954
        self.assertAlmostEqual(sim_euclidean(QGrams('nelson'),
955
                                             QGrams('neilsen')),
956
                               1-7**0.5/23**0.5)
957
958
        # non-q-gram tests
959
        self.assertEqual(sim_euclidean('', '', 0), 1)
960
        self.assertEqual(sim_euclidean('the quick', '', 0), 0)
961
        self.assertEqual(sim_euclidean('', 'the quick', 0), 0)
962
        self.assertAlmostEqual(sim_euclidean(NONQ_FROM, NONQ_TO, 0),
963
                               1-8**0.5/24**0.5)
964
        self.assertAlmostEqual(sim_euclidean(NONQ_TO, NONQ_FROM, 0),
965
                               1-8**0.5/24**0.5)
966
967
    def test_dist_euclidean(self):
968
        """Test abydos.distance.dist_euclidean."""
969
        self.assertEqual(dist_euclidean('', ''), 0)
970
        self.assertEqual(dist_euclidean('nelson', ''), 1)
971
        self.assertEqual(dist_euclidean('', 'neilsen'), 1)
972
        self.assertAlmostEqual(dist_euclidean('nelson', 'neilsen'),
973
                               7**0.5 / 23**0.5)
974
975
        self.assertEqual(dist_euclidean('', '', 2), 0)
976
        self.assertEqual(dist_euclidean('nelson', '', 2), 1)
977
        self.assertEqual(dist_euclidean('', 'neilsen', 2), 1)
978
        self.assertAlmostEqual(dist_euclidean('nelson', 'neilsen', 2),
979
                               7**0.5 / 23**0.5)
980
981
        # supplied q-gram tests
982
        self.assertEqual(dist_euclidean(QGrams(''), QGrams('')), 0)
983
        self.assertEqual(dist_euclidean(QGrams('nelson'), QGrams('')), 1)
984
        self.assertEqual(dist_euclidean(QGrams(''), QGrams('neilsen')), 1)
985
        self.assertAlmostEqual(dist_euclidean(QGrams('nelson'),
986
                                              QGrams('neilsen')),
987
                               7**0.5 / 23**0.5)
988
989
        # non-q-gram tests
990
        self.assertEqual(dist_euclidean('', '', 0), 0)
991
        self.assertEqual(dist_euclidean('the quick', '', 0), 1)
992
        self.assertEqual(dist_euclidean('', 'the quick', 0), 1)
993
        self.assertAlmostEqual(dist_euclidean(NONQ_FROM, NONQ_TO, 0),
994
                               8**0.5/24**0.5)
995
        self.assertAlmostEqual(dist_euclidean(NONQ_TO, NONQ_FROM, 0),
996
                               8**0.5/24**0.5)
997
998
999
class ChebyshevTestCases(unittest.TestCase):
1000
    """Test Chebyshev functions.
1001
1002
    abydos.distance.chebyshev, sim_chebyshev & .dist_chebyshev
1003
    """
1004
1005
    def test_chebyshev(self):
1006
        """Test abydos.distance.chebyshev."""
1007
        self.assertEqual(chebyshev('', ''), 0)
1008
        self.assertEqual(chebyshev('nelson', ''), 1)
1009
        self.assertEqual(chebyshev('', 'neilsen'), 1)
1010
        self.assertEqual(chebyshev('nelson', 'neilsen'), 1)
1011
1012
        self.assertEqual(chebyshev('', '', 2), 0)
1013
        self.assertEqual(chebyshev('nelson', '', 2), 1)
1014
        self.assertEqual(chebyshev('', 'neilsen', 2), 1)
1015
        self.assertAlmostEqual(chebyshev('nelson', 'neilsen', 2), 1)
1016
1017
        # supplied q-gram tests
1018
        self.assertEqual(chebyshev(QGrams(''), QGrams('')), 0)
1019
        self.assertEqual(chebyshev(QGrams('nelson'), QGrams('')), 1)
1020
        self.assertEqual(chebyshev(QGrams(''), QGrams('neilsen')), 1)
1021
        self.assertAlmostEqual(chebyshev(QGrams('nelson'),
1022
                                         QGrams('neilsen')), 1)
1023
1024
        # non-q-gram tests
1025
        self.assertEqual(chebyshev('', '', 0), 0)
1026
        self.assertEqual(chebyshev('the quick', '', 0), 1)
1027
        self.assertEqual(chebyshev('', 'the quick', 0), 1)
1028
        self.assertAlmostEqual(chebyshev(NONQ_FROM, NONQ_TO, 0), 1)
1029
        self.assertAlmostEqual(chebyshev(NONQ_TO, NONQ_FROM, 0), 1)
1030
1031
    def test_sim_chebyshev(self):
1032
        """Test abydos.distance.sim_chebyshev."""
1033
        self.assertEqual(sim_chebyshev('', ''), 1)
1034
        self.assertEqual(sim_chebyshev('nelson', ''), 0)
1035
        self.assertEqual(sim_chebyshev('', 'neilsen'), 0)
1036
        self.assertEqual(sim_chebyshev('nelson', 'neilsen'), 0)
1037
1038
        self.assertEqual(sim_chebyshev('', '', 2), 1)
1039
        self.assertEqual(sim_chebyshev('nelson', '', 2), 0)
1040
        self.assertEqual(sim_chebyshev('', 'neilsen', 2), 0)
1041
        self.assertAlmostEqual(sim_chebyshev('nelson', 'neilsen', 2), 0)
1042
1043
        # supplied q-gram tests
1044
        self.assertEqual(sim_chebyshev(QGrams(''), QGrams('')), 1)
1045
        self.assertEqual(sim_chebyshev(QGrams('nelson'), QGrams('')), 0)
1046
        self.assertEqual(sim_chebyshev(QGrams(''), QGrams('neilsen')), 0)
1047
        self.assertAlmostEqual(sim_chebyshev(QGrams('nelson'),
1048
                                             QGrams('neilsen')), 0)
1049
1050
        # non-q-gram tests
1051
        self.assertEqual(sim_chebyshev('', '', 0), 1)
1052
        self.assertEqual(sim_chebyshev('the quick', '', 0), 0)
1053
        self.assertEqual(sim_chebyshev('', 'the quick', 0), 0)
1054
        self.assertAlmostEqual(sim_chebyshev(NONQ_FROM, NONQ_TO, 0), 0)
1055
        self.assertAlmostEqual(sim_chebyshev(NONQ_TO, NONQ_FROM, 0), 0)
1056
1057
    def test_dist_chebyshev(self):
1058
        """Test abydos.distance.dist_chebyshev."""
1059
        self.assertEqual(dist_chebyshev('', ''), 0)
1060
        self.assertEqual(dist_chebyshev('nelson', ''), 1)
1061
        self.assertEqual(dist_chebyshev('', 'neilsen'), 1)
1062
        self.assertEqual(dist_chebyshev('nelson', 'neilsen'), 1)
1063
1064
        self.assertEqual(dist_chebyshev('', '', 2), 0)
1065
        self.assertEqual(dist_chebyshev('nelson', '', 2), 1)
1066
        self.assertEqual(dist_chebyshev('', 'neilsen', 2), 1)
1067
        self.assertAlmostEqual(dist_chebyshev('nelson', 'neilsen', 2), 1)
1068
1069
        # supplied q-gram tests
1070
        self.assertEqual(dist_chebyshev(QGrams(''), QGrams('')), 0)
1071
        self.assertEqual(dist_chebyshev(QGrams('nelson'), QGrams('')), 1)
1072
        self.assertEqual(dist_chebyshev(QGrams(''), QGrams('neilsen')), 1)
1073
        self.assertAlmostEqual(dist_chebyshev(QGrams('nelson'),
1074
                                              QGrams('neilsen')), 1)
1075
1076
        # non-q-gram tests
1077
        self.assertEqual(dist_chebyshev('', '', 0), 0)
1078
        self.assertEqual(dist_chebyshev('the quick', '', 0), 1)
1079
        self.assertEqual(dist_chebyshev('', 'the quick', 0), 1)
1080
        self.assertAlmostEqual(dist_chebyshev(NONQ_FROM, NONQ_TO, 0), 1)
1081
        self.assertAlmostEqual(dist_chebyshev(NONQ_TO, NONQ_FROM, 0), 1)
1082
1083
1084
class CosineSimilarityTestCases(unittest.TestCase):
1085
    """Test cosine similarity functions.
1086
1087
    abydos.distance.sim_cosine & .dist_cosine
1088
    """
1089
1090
    def test_sim_cosine(self):
1091
        """Test abydos.distance.sim_cosine."""
1092
        self.assertEqual(sim_cosine('', ''), 1)
1093
        self.assertEqual(sim_cosine('nelson', ''), 0)
1094
        self.assertEqual(sim_cosine('', 'neilsen'), 0)
1095
        self.assertAlmostEqual(sim_cosine('nelson', 'neilsen'),
1096
                               4/math.sqrt(7*8))
1097
1098
        self.assertEqual(sim_cosine('', '', 2), 1)
1099
        self.assertEqual(sim_cosine('nelson', '', 2), 0)
1100
        self.assertEqual(sim_cosine('', 'neilsen', 2), 0)
1101
        self.assertAlmostEqual(sim_cosine('nelson', 'neilsen', 2),
1102
                               4/math.sqrt(7*8))
1103
1104
        # supplied q-gram tests
1105
        self.assertEqual(sim_cosine(QGrams(''), QGrams('')), 1)
1106
        self.assertEqual(sim_cosine(QGrams('nelson'), QGrams('')), 0)
1107
        self.assertEqual(sim_cosine(QGrams(''), QGrams('neilsen')), 0)
1108
        self.assertAlmostEqual(sim_cosine(QGrams('nelson'), QGrams('neilsen')),
1109
                               4/math.sqrt(7*8))
1110
1111
        # non-q-gram tests
1112
        self.assertEqual(sim_cosine('', '', 0), 1)
1113
        self.assertEqual(sim_cosine('the quick', '', 0), 0)
1114
        self.assertEqual(sim_cosine('', 'the quick', 0), 0)
1115
        self.assertAlmostEqual(sim_cosine(NONQ_FROM, NONQ_TO, 0),
1116
                               4/math.sqrt(9*7))
1117
        self.assertAlmostEqual(sim_cosine(NONQ_TO, NONQ_FROM, 0),
1118
                               4/math.sqrt(9*7))
1119
1120
    def test_dist_cosine(self):
1121
        """Test abydos.distance.dist_cosine."""
1122
        self.assertEqual(dist_cosine('', ''), 0)
1123
        self.assertEqual(dist_cosine('nelson', ''), 1)
1124
        self.assertEqual(dist_cosine('', 'neilsen'), 1)
1125
        self.assertAlmostEqual(dist_cosine('nelson', 'neilsen'),
1126
                               1-(4/math.sqrt(7*8)))
1127
1128
        self.assertEqual(dist_cosine('', '', 2), 0)
1129
        self.assertEqual(dist_cosine('nelson', '', 2), 1)
1130
        self.assertEqual(dist_cosine('', 'neilsen', 2), 1)
1131
        self.assertAlmostEqual(dist_cosine('nelson', 'neilsen', 2),
1132
                               1-(4/math.sqrt(7*8)))
1133
1134
        # supplied q-gram tests
1135
        self.assertEqual(dist_cosine(QGrams(''), QGrams('')), 0)
1136
        self.assertEqual(dist_cosine(QGrams('nelson'), QGrams('')), 1)
1137
        self.assertEqual(dist_cosine(QGrams(''), QGrams('neilsen')), 1)
1138
        self.assertAlmostEqual(dist_cosine(QGrams('nelson'),
1139
                                           QGrams('neilsen')),
1140
                               1-(4/math.sqrt(7*8)))
1141
1142
        # non-q-gram tests
1143
        self.assertEqual(dist_cosine('', '', 0), 0)
1144
        self.assertEqual(dist_cosine('the quick', '', 0), 1)
1145
        self.assertEqual(dist_cosine('', 'the quick', 0), 1)
1146
        self.assertAlmostEqual(dist_cosine(NONQ_FROM, NONQ_TO, 0),
1147
                               1-4/math.sqrt(9*7))
1148
        self.assertAlmostEqual(dist_cosine(NONQ_TO, NONQ_FROM, 0),
1149
                               1-4/math.sqrt(9*7))
1150
1151
1152
class JaroWinklerTestCases(unittest.TestCase):
1153
    """Test Jaro(-Winkler) functions.
1154
1155
    abydos.distance.sim_strcmp95, .dist_strcmp95, .sim_jaro_winkler, &
1156
    .dist_jaro_winkler
1157
    """
1158
1159
    def test_sim_strcmp95(self):
1160
        """Test abydos.distance.sim_strcmp95."""
1161
        self.assertEqual(sim_strcmp95('', ''), 1)
1162
        self.assertEqual(sim_strcmp95('MARTHA', ''), 0)
1163
        self.assertEqual(sim_strcmp95('', 'MARTHA'), 0)
1164
        self.assertEqual(sim_strcmp95('MARTHA', 'MARTHA'), 1)
1165
1166
        self.assertAlmostEqual(sim_strcmp95('MARTHA', 'MARHTA'), 0.96111111)
1167
        self.assertAlmostEqual(sim_strcmp95('DWAYNE', 'DUANE'), 0.873)
1168
        self.assertAlmostEqual(sim_strcmp95('DIXON', 'DICKSONX'), 0.839333333)
1169
1170
        self.assertAlmostEqual(sim_strcmp95('ABCD', 'EFGH'), 0.0)
1171
1172
        # long_strings = True
1173
        self.assertAlmostEqual(sim_strcmp95('DIXON', 'DICKSONX', True),
1174
                               0.85393939)
1175
        self.assertAlmostEqual(sim_strcmp95('DWAYNE', 'DUANE', True),
1176
                               0.89609090)
1177
        self.assertAlmostEqual(sim_strcmp95('MARTHA', 'MARHTA', True),
1178
                               0.97083333)
1179
1180
        # cover case where we don't boost, etc.
1181
        self.assertAlmostEqual(sim_strcmp95('A', 'ABCDEFGHIJK'), 69/99)
1182
        self.assertAlmostEqual(sim_strcmp95('A', 'ABCDEFGHIJK', True), 69 / 99)
1183
        self.assertAlmostEqual(sim_strcmp95('d', 'abcdefgh'), 0.708333333)
1184
        self.assertAlmostEqual(sim_strcmp95('d', 'abcdefgh', True),
1185
                               0.708333333)
1186
        self.assertAlmostEqual(sim_strcmp95('1', 'abc1efgh', True),
1187
                               0.708333333)
1188
        self.assertAlmostEqual(sim_strcmp95('12hundredths', '12hundred', True),
1189
                               0.916666667)
1190
1191
    def test_dist_strcmp95(self):
1192
        """Test abydos.distance.dist_strcmp95."""
1193
        self.assertEqual(dist_strcmp95('', ''), 0)
1194
        self.assertEqual(dist_strcmp95('MARTHA', ''), 1)
1195
        self.assertEqual(dist_strcmp95('', 'MARTHA'), 1)
1196
        self.assertEqual(dist_strcmp95('MARTHA', 'MARTHA'), 0)
1197
1198
        self.assertAlmostEqual(dist_strcmp95('MARTHA', 'MARHTA'), 0.03888888)
1199
        self.assertAlmostEqual(dist_strcmp95('DWAYNE', 'DUANE'), 0.127)
1200
        self.assertAlmostEqual(dist_strcmp95('DIXON', 'DICKSONX'), 0.160666666)
1201
1202
        self.assertAlmostEqual(dist_strcmp95('ABCD', 'EFGH'), 1.0)
1203
1204
    def test_sim_jaro_winkler(self):
1205
        """Test abydos.distance.sim_jaro_winkler."""
1206
        self.assertEqual(sim_jaro_winkler('', '', mode='jaro'), 1)
1207
        self.assertEqual(sim_jaro_winkler('', '', mode='winkler'), 1)
1208
        self.assertEqual(sim_jaro_winkler('MARTHA', '', mode='jaro'), 0)
1209
        self.assertEqual(sim_jaro_winkler('MARTHA', '', mode='winkler'), 0)
1210
        self.assertEqual(sim_jaro_winkler('', 'MARHTA', mode='jaro'), 0)
1211
        self.assertEqual(sim_jaro_winkler('', 'MARHTA', mode='winkler'), 0)
1212
        self.assertEqual(sim_jaro_winkler('MARTHA', 'MARTHA', mode='jaro'), 1)
1213
        self.assertEqual(sim_jaro_winkler('MARTHA', 'MARTHA', mode='winkler'),
1214
                         1)
1215
1216
        # https://en.wikipedia.org/wiki/Jaro-Winkler_distance
1217
        self.assertAlmostEqual(sim_jaro_winkler('MARTHA', 'MARHTA',
1218
                                                mode='jaro'), 0.94444444)
1219
        self.assertAlmostEqual(sim_jaro_winkler('MARTHA', 'MARHTA',
1220
                                                mode='winkler'), 0.96111111)
1221
        self.assertAlmostEqual(sim_jaro_winkler('DWAYNE', 'DUANE',
1222
                                                mode='jaro'), 0.82222222)
1223
        self.assertAlmostEqual(sim_jaro_winkler('DWAYNE', 'DUANE',
1224
                                                mode='winkler'), 0.84)
1225
        self.assertAlmostEqual(sim_jaro_winkler('DIXON', 'DICKSONX',
1226
                                                mode='jaro'), 0.76666666)
1227
        self.assertAlmostEqual(sim_jaro_winkler('DIXON', 'DICKSONX',
1228
                                                mode='winkler'), 0.81333333)
1229
1230
        self.assertRaises(ValueError, sim_jaro_winkler, 'abcd', 'dcba',
1231
                          boost_threshold=2)
1232
        self.assertRaises(ValueError, sim_jaro_winkler, 'abcd', 'dcba',
1233
                          boost_threshold=-1)
1234
        self.assertRaises(ValueError, sim_jaro_winkler, 'abcd', 'dcba',
1235
                          scaling_factor=0.3)
1236
        self.assertRaises(ValueError, sim_jaro_winkler, 'abcd', 'dcba',
1237
                          scaling_factor=-1)
1238
1239
        self.assertAlmostEqual(sim_jaro_winkler('ABCD', 'EFGH'), 0.0)
1240
1241
        # long_strings = True (applies only to Jaro-Winkler, not Jaro)
1242
        self.assertEqual(sim_jaro_winkler('ABCD', 'EFGH', long_strings=True),
1243
                         sim_jaro_winkler('ABCD', 'EFGH'))
1244
        self.assertEqual(sim_jaro_winkler('DIXON', 'DICKSONX', mode='jaro',
1245
                                          long_strings=True),
1246
                         sim_jaro_winkler('DIXON', 'DICKSONX',
1247
                                          mode='jaro'))
1248
        self.assertAlmostEqual(sim_jaro_winkler('DIXON', 'DICKSONX',
1249
                                                mode='winkler',
1250
                                                long_strings=True), 0.83030303)
1251
        self.assertAlmostEqual(sim_jaro_winkler('MARTHA', 'MARHTA',
1252
                                                mode='winkler',
1253
                                                long_strings=True), 0.97083333)
1254
1255
    def test_dist_jaro_winkler(self):
1256
        """Test abydos.distance.dist_jaro_winkler."""
1257
        self.assertEqual(dist_jaro_winkler('', '', mode='jaro'), 0)
1258
        self.assertEqual(dist_jaro_winkler('', '', mode='winkler'), 0)
1259
        self.assertEqual(dist_jaro_winkler('MARTHA', '', mode='jaro'), 1)
1260
        self.assertEqual(dist_jaro_winkler('MARTHA', '', mode='winkler'), 1)
1261
        self.assertEqual(dist_jaro_winkler('', 'MARHTA', mode='jaro'), 1)
1262
        self.assertEqual(dist_jaro_winkler('', 'MARHTA', mode='winkler'), 1)
1263
        self.assertEqual(dist_jaro_winkler('MARTHA', 'MARTHA', mode='jaro'), 0)
1264
        self.assertEqual(dist_jaro_winkler('MARTHA', 'MARTHA', mode='winkler'),
1265
                         0)
1266
1267
        # https://en.wikipedia.org/wiki/Jaro-Winkler_distance
1268
        self.assertAlmostEqual(dist_jaro_winkler('MARTHA', 'MARHTA',
1269
                                                 mode='jaro'), 0.05555555)
1270
        self.assertAlmostEqual(dist_jaro_winkler('MARTHA', 'MARHTA',
1271
                                                 mode='winkler'), 0.03888888)
1272
        self.assertAlmostEqual(dist_jaro_winkler('DWAYNE', 'DUANE',
1273
                                                 mode='jaro'), 0.17777777)
1274
        self.assertAlmostEqual(dist_jaro_winkler('DWAYNE', 'DUANE',
1275
                                                 mode='winkler'), 0.16)
1276
        self.assertAlmostEqual(dist_jaro_winkler('DIXON', 'DICKSONX',
1277
                                                 mode='jaro'), 0.23333333)
1278
        self.assertAlmostEqual(dist_jaro_winkler('DIXON', 'DICKSONX',
1279
                                                 mode='winkler'), 0.18666666)
1280
1281
        self.assertRaises(ValueError, dist_jaro_winkler, 'abcd', 'dcba',
1282
                          boost_threshold=2)
1283
        self.assertRaises(ValueError, dist_jaro_winkler, 'abcd', 'dcba',
1284
                          boost_threshold=-1)
1285
        self.assertRaises(ValueError, dist_jaro_winkler, 'abcd', 'dcba',
1286
                          scaling_factor=0.3)
1287
        self.assertRaises(ValueError, dist_jaro_winkler, 'abcd', 'dcba',
1288
                          scaling_factor=-1)
1289
1290
        self.assertAlmostEqual(dist_jaro_winkler('ABCD', 'EFGH'), 1.0)
1291
1292
1293
class LcsseqTestCases(unittest.TestCase):
1294
    """Test LCSseq functions.
1295
1296
    abydos.distance.lcsseq, .sim_lcsseq, & .dist_lcsseq
1297
    """
1298
1299
    def test_lcsseq(self):
1300
        """Test abydos.distance.lcsseq."""
1301
        self.assertEqual(lcsseq('', ''), '')
1302
        self.assertEqual(lcsseq('A', ''), '')
1303
        self.assertEqual(lcsseq('', 'A'), '')
1304
        self.assertEqual(lcsseq('A', 'A'), 'A')
1305
        self.assertEqual(lcsseq('ABCD', ''), '')
1306
        self.assertEqual(lcsseq('', 'ABCD'), '')
1307
        self.assertEqual(lcsseq('ABCD', 'ABCD'), 'ABCD')
1308
        self.assertEqual(lcsseq('ABCD', 'BC'), 'BC')
1309
        self.assertEqual(lcsseq('ABCD', 'AD'), 'AD')
1310
        self.assertEqual(lcsseq('ABCD', 'AC'), 'AC')
1311
        self.assertEqual(lcsseq('AB', 'CD'), '')
1312
        self.assertEqual(lcsseq('ABC', 'BCD'), 'BC')
1313
1314
        self.assertEqual(lcsseq('DIXON', 'DICKSONX'), 'DION')
1315
1316
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1317
        self.assertEqual(lcsseq('AGCAT', 'GAC'), 'AC')
1318
        self.assertEqual(lcsseq('XMJYAUZ', 'MZJAWXU'), 'MJAU')
1319
1320
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1321
        self.assertEqual(lcsseq('hell', 'hello'), 'hell')
1322
        self.assertEqual(lcsseq('hello', 'hell'), 'hell')
1323
        self.assertEqual(lcsseq('ell', 'hell'), 'ell')
1324
        self.assertEqual(lcsseq('hell', 'ell'), 'ell')
1325
        self.assertEqual(lcsseq('faxbcd', 'abdef'), 'abd')
1326
1327
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1328
        self.assertEqual(lcsseq('hello world', 'world war 2'), 'world')
1329
        self.assertEqual(lcsseq('foo bar', 'bar foo'), 'foo')
1330
        self.assertEqual(lcsseq('aaa', 'aa'), 'aa')
1331
        self.assertEqual(lcsseq('cc', 'bbbbcccccc'), 'cc')
1332
        self.assertEqual(lcsseq('ccc', 'bcbb'), 'c')
1333
1334
    def test_sim_lcsseq(self):
1335
        """Test abydos.distance.sim_lcsseq."""
1336
        self.assertEqual(sim_lcsseq('', ''), 1)
1337
        self.assertEqual(sim_lcsseq('A', ''), 0)
1338
        self.assertEqual(sim_lcsseq('', 'A'), 0)
1339
        self.assertEqual(sim_lcsseq('A', 'A'), 1)
1340
        self.assertEqual(sim_lcsseq('ABCD', ''), 0)
1341
        self.assertEqual(sim_lcsseq('', 'ABCD'), 0)
1342
        self.assertEqual(sim_lcsseq('ABCD', 'ABCD'), 1)
1343
        self.assertAlmostEqual(sim_lcsseq('ABCD', 'BC'), 2/4)
1344
        self.assertAlmostEqual(sim_lcsseq('ABCD', 'AD'), 2/4)
1345
        self.assertAlmostEqual(sim_lcsseq('ABCD', 'AC'), 2/4)
1346
        self.assertAlmostEqual(sim_lcsseq('AB', 'CD'), 0)
1347
        self.assertAlmostEqual(sim_lcsseq('ABC', 'BCD'), 2/3)
1348
1349
        self.assertAlmostEqual(sim_lcsseq('DIXON', 'DICKSONX'), 4/8)
1350
1351
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1352
        self.assertAlmostEqual(sim_lcsseq('AGCAT', 'GAC'), 2/5)
1353
        self.assertAlmostEqual(sim_lcsseq('XMJYAUZ', 'MZJAWXU'), 4/7)
1354
1355
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1356
        self.assertAlmostEqual(sim_lcsseq('hell', 'hello'), 4/5)
1357
        self.assertAlmostEqual(sim_lcsseq('hello', 'hell'), 4/5)
1358
        self.assertAlmostEqual(sim_lcsseq('ell', 'hell'), 3/4)
1359
        self.assertAlmostEqual(sim_lcsseq('hell', 'ell'), 3/4)
1360
        self.assertAlmostEqual(sim_lcsseq('faxbcd', 'abdef'), 3/6)
1361
1362
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1363
        self.assertAlmostEqual(sim_lcsseq('hello world', 'world war 2'), 5/11)
1364
        self.assertAlmostEqual(sim_lcsseq('foo bar', 'bar foo'), 3/7)
1365
        self.assertAlmostEqual(sim_lcsseq('aaa', 'aa'), 2/3)
1366
        self.assertAlmostEqual(sim_lcsseq('cc', 'bbbbcccccc'), 2/10)
1367
        self.assertAlmostEqual(sim_lcsseq('ccc', 'bcbb'), 1/4)
1368
1369
    def test_dist_lcsseq(self):
1370
        """Test abydos.distance.dist_lcsseq."""
1371
        self.assertEqual(dist_lcsseq('', ''), 0)
1372
        self.assertEqual(dist_lcsseq('A', ''), 1)
1373
        self.assertEqual(dist_lcsseq('', 'A'), 1)
1374
        self.assertEqual(dist_lcsseq('A', 'A'), 0)
1375
        self.assertEqual(dist_lcsseq('ABCD', ''), 1)
1376
        self.assertEqual(dist_lcsseq('', 'ABCD'), 1)
1377
        self.assertEqual(dist_lcsseq('ABCD', 'ABCD'), 0)
1378
        self.assertAlmostEqual(dist_lcsseq('ABCD', 'BC'), 2/4)
1379
        self.assertAlmostEqual(dist_lcsseq('ABCD', 'AD'), 2/4)
1380
        self.assertAlmostEqual(dist_lcsseq('ABCD', 'AC'), 2/4)
1381
        self.assertAlmostEqual(dist_lcsseq('AB', 'CD'), 1)
1382
        self.assertAlmostEqual(dist_lcsseq('ABC', 'BCD'), 1/3)
1383
1384
        self.assertAlmostEqual(dist_lcsseq('DIXON', 'DICKSONX'), 4/8)
1385
1386
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1387
        self.assertAlmostEqual(dist_lcsseq('AGCAT', 'GAC'), 3/5)
1388
        self.assertAlmostEqual(dist_lcsseq('XMJYAUZ', 'MZJAWXU'), 3/7)
1389
1390
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1391
        self.assertAlmostEqual(dist_lcsseq('hell', 'hello'), 1/5)
1392
        self.assertAlmostEqual(dist_lcsseq('hello', 'hell'), 1/5)
1393
        self.assertAlmostEqual(dist_lcsseq('ell', 'hell'), 1/4)
1394
        self.assertAlmostEqual(dist_lcsseq('hell', 'ell'), 1/4)
1395
        self.assertAlmostEqual(dist_lcsseq('faxbcd', 'abdef'), 3/6)
1396
1397
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1398
        self.assertAlmostEqual(dist_lcsseq('hello world', 'world war 2'), 6/11)
1399
        self.assertAlmostEqual(dist_lcsseq('foo bar', 'bar foo'), 4/7)
1400
        self.assertAlmostEqual(dist_lcsseq('aaa', 'aa'), 1/3)
1401
        self.assertAlmostEqual(dist_lcsseq('cc', 'bbbbcccccc'), 8/10)
1402
        self.assertAlmostEqual(dist_lcsseq('ccc', 'bcbb'), 3/4)
1403
1404
1405
class LcsstrTestCases(unittest.TestCase):
1406
    """Test LCSstr functions.
1407
1408
    abydos.distance.lcsstr, .sim_lcsstr, & .dist_lcsstr
1409
    """
1410
1411
    def test_lcsstr(self):
1412
        """Test abydos.distance.lcsstr."""
1413
        self.assertEqual(lcsstr('', ''), '')
1414
        self.assertEqual(lcsstr('A', ''), '')
1415
        self.assertEqual(lcsstr('', 'A'), '')
1416
        self.assertEqual(lcsstr('A', 'A'), 'A')
1417
        self.assertEqual(lcsstr('ABCD', ''), '')
1418
        self.assertEqual(lcsstr('', 'ABCD'), '')
1419
        self.assertEqual(lcsstr('ABCD', 'ABCD'), 'ABCD')
1420
        self.assertEqual(lcsstr('ABCD', 'BC'), 'BC')
1421
        self.assertEqual(lcsstr('ABCD', 'AD'), 'A')
1422
        self.assertEqual(lcsstr('ABCD', 'AC'), 'A')
1423
        self.assertEqual(lcsstr('AB', 'CD'), '')
1424
        self.assertEqual(lcsstr('ABC', 'BCD'), 'BC')
1425
1426
        self.assertEqual(lcsstr('DIXON', 'DICKSONX'), 'DI')
1427
1428
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1429
        self.assertEqual(lcsstr('AGCAT', 'GAC'), 'A')
1430
        self.assertEqual(lcsstr('XMJYAUZ', 'MZJAWXU'), 'X')
1431
1432
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1433
        self.assertEqual(lcsstr('hell', 'hello'), 'hell')
1434
        self.assertEqual(lcsstr('hello', 'hell'), 'hell')
1435
        self.assertEqual(lcsstr('ell', 'hell'), 'ell')
1436
        self.assertEqual(lcsstr('hell', 'ell'), 'ell')
1437
        self.assertEqual(lcsstr('faxbcd', 'abdef'), 'f')
1438
1439
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1440
        self.assertEqual(lcsstr('hello world', 'world war 2'), 'world')
1441
        self.assertEqual(lcsstr('foo bar', 'bar foo'), 'foo')
1442
        self.assertEqual(lcsstr('aaa', 'aa'), 'aa')
1443
        self.assertEqual(lcsstr('cc', 'bbbbcccccc'), 'cc')
1444
        self.assertEqual(lcsstr('ccc', 'bcbb'), 'c')
1445
1446
        # http://www.maplesoft.com/support/help/Maple/view.aspx?path=StringTools/LongestCommonSubString
1447
        self.assertEqual(lcsstr('abax', 'bax'), 'bax')
1448
        self.assertEqual(lcsstr('tsaxbaxyz', 'axcaxy'), 'axy')
1449
        self.assertEqual(lcsstr('abcde', 'uvabxycde'), 'cde')
1450
        self.assertEqual(lcsstr('abc', 'xyz'), '')
1451
        self.assertEqual(lcsstr('TAAGGTCGGCGCGCACGCTGGCGAGTATGGTGCGGAGGCCCTGGA\
1452
GAGGTGAGGCTCCCTCCCCTGCTCCGACCCGGGCTCCTCGCCCGCCCGGACCCAC', 'AAGCGCCGCGCAGTCTGGG\
1453
CTCCGCACACTTCTGGTCCAGTCCGACTGAGAAGGAACCACCATGGTGCTGTCTCCCGCTGACAAGACCAACATCAAG\
1454
ACTGCCTGGGAAAAGATCGGCAGCCACGGTGGCGAGTATGGCGCCGAGGCCGT'), 'TGGCGAGTATGG')
1455
1456
    def test_sim_lcsstr(self):
1457
        """Test abydos.distance.sim_lcsstr."""
1458
        self.assertEqual(sim_lcsstr('', ''), 1)
1459
        self.assertEqual(sim_lcsstr('A', ''), 0)
1460
        self.assertEqual(sim_lcsstr('', 'A'), 0)
1461
        self.assertEqual(sim_lcsstr('A', 'A'), 1)
1462
        self.assertEqual(sim_lcsstr('ABCD', ''), 0)
1463
        self.assertEqual(sim_lcsstr('', 'ABCD'), 0)
1464
        self.assertEqual(sim_lcsstr('ABCD', 'ABCD'), 1)
1465
        self.assertAlmostEqual(sim_lcsstr('ABCD', 'BC'), 2/4)
1466
        self.assertAlmostEqual(sim_lcsstr('ABCD', 'AD'), 1/4)
1467
        self.assertAlmostEqual(sim_lcsstr('ABCD', 'AC'), 1/4)
1468
        self.assertAlmostEqual(sim_lcsstr('AB', 'CD'), 0)
1469
        self.assertAlmostEqual(sim_lcsstr('ABC', 'BCD'), 2/3)
1470
1471
        self.assertAlmostEqual(sim_lcsstr('DIXON', 'DICKSONX'), 2/8)
1472
1473
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1474
        self.assertAlmostEqual(sim_lcsstr('AGCAT', 'GAC'), 1/5)
1475
        self.assertAlmostEqual(sim_lcsstr('XMJYAUZ', 'MZJAWXU'), 1/7)
1476
1477
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1478
        self.assertAlmostEqual(sim_lcsstr('hell', 'hello'), 4/5)
1479
        self.assertAlmostEqual(sim_lcsstr('hello', 'hell'), 4/5)
1480
        self.assertAlmostEqual(sim_lcsstr('ell', 'hell'), 3/4)
1481
        self.assertAlmostEqual(sim_lcsstr('hell', 'ell'), 3/4)
1482
        self.assertAlmostEqual(sim_lcsstr('faxbcd', 'abdef'), 1/6)
1483
1484
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1485
        self.assertAlmostEqual(sim_lcsstr('hello world', 'world war 2'), 5/11)
1486
        self.assertAlmostEqual(sim_lcsstr('foo bar', 'bar foo'), 3/7)
1487
        self.assertAlmostEqual(sim_lcsstr('aaa', 'aa'), 2/3)
1488
        self.assertAlmostEqual(sim_lcsstr('cc', 'bbbbcccccc'), 2/10)
1489
        self.assertAlmostEqual(sim_lcsstr('ccc', 'bcbb'), 1/4)
1490
1491
    def test_dist_lcsstr(self):
1492
        """Test abydos.distance.dist_lcsstr."""
1493
        self.assertEqual(dist_lcsstr('', ''), 0)
1494
        self.assertEqual(dist_lcsstr('A', ''), 1)
1495
        self.assertEqual(dist_lcsstr('', 'A'), 1)
1496
        self.assertEqual(dist_lcsstr('A', 'A'), 0)
1497
        self.assertEqual(dist_lcsstr('ABCD', ''), 1)
1498
        self.assertEqual(dist_lcsstr('', 'ABCD'), 1)
1499
        self.assertEqual(dist_lcsstr('ABCD', 'ABCD'), 0)
1500
        self.assertAlmostEqual(dist_lcsstr('ABCD', 'BC'), 2/4)
1501
        self.assertAlmostEqual(dist_lcsstr('ABCD', 'AD'), 3/4)
1502
        self.assertAlmostEqual(dist_lcsstr('ABCD', 'AC'), 3/4)
1503
        self.assertAlmostEqual(dist_lcsstr('AB', 'CD'), 1)
1504
        self.assertAlmostEqual(dist_lcsstr('ABC', 'BCD'), 1/3)
1505
1506
        self.assertAlmostEqual(dist_lcsstr('DIXON', 'DICKSONX'), 6/8)
1507
1508
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1509
        self.assertAlmostEqual(dist_lcsstr('AGCAT', 'GAC'), 4/5)
1510
        self.assertAlmostEqual(dist_lcsstr('XMJYAUZ', 'MZJAWXU'), 6/7)
1511
1512
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1513
        self.assertAlmostEqual(dist_lcsstr('hell', 'hello'), 1/5)
1514
        self.assertAlmostEqual(dist_lcsstr('hello', 'hell'), 1/5)
1515
        self.assertAlmostEqual(dist_lcsstr('ell', 'hell'), 1/4)
1516
        self.assertAlmostEqual(dist_lcsstr('hell', 'ell'), 1/4)
1517
        self.assertAlmostEqual(dist_lcsstr('faxbcd', 'abdef'), 5/6)
1518
1519
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1520
        self.assertAlmostEqual(dist_lcsstr('hello world', 'world war 2'), 6/11)
1521
        self.assertAlmostEqual(dist_lcsstr('foo bar', 'bar foo'), 4/7)
1522
        self.assertAlmostEqual(dist_lcsstr('aaa', 'aa'), 1/3)
1523
        self.assertAlmostEqual(dist_lcsstr('cc', 'bbbbcccccc'), 8/10)
1524
        self.assertAlmostEqual(dist_lcsstr('ccc', 'bcbb'), 3/4)
1525
1526
1527
class RatcliffObershelpTestCases(unittest.TestCase):
1528
    """Test Ratcliff-Obserhelp functions.
1529
1530
    abydos.distance.sim_ratcliff_obershelp, &
1531
    abydos.distance.dist_ratcliff_obershelp
1532
    """
1533
1534
    def test_sim_ratcliff_obershelp(self):
1535
        """Test abydos.distance.sim_ratcliff_obershelp."""
1536
        # https://github.com/rockymadden/stringmetric/blob/master/core/src/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala
1537
        self.assertEqual(sim_ratcliff_obershelp('', ''), 1)
1538
        self.assertEqual(sim_ratcliff_obershelp('abc', ''), 0)
1539
        self.assertEqual(sim_ratcliff_obershelp('', 'xyz'), 0)
1540
        self.assertEqual(sim_ratcliff_obershelp('abc', 'abc'), 1)
1541
        self.assertEqual(sim_ratcliff_obershelp('123', '123'), 1)
1542
        self.assertEqual(sim_ratcliff_obershelp('abc', 'xyz'), 0)
1543
        self.assertEqual(sim_ratcliff_obershelp('123', '456'), 0)
1544
        self.assertAlmostEqual(sim_ratcliff_obershelp('aleksander',
1545
                                                      'alexandre'),
1546
                               0.7368421052631579)
1547
        self.assertAlmostEqual(sim_ratcliff_obershelp('alexandre',
1548
                                                      'aleksander'),
1549
                               0.7368421052631579)
1550
        self.assertAlmostEqual(sim_ratcliff_obershelp('pennsylvania',
1551
                                                      'pencilvaneya'),
1552
                               0.6666666666666666)
1553
        self.assertAlmostEqual(sim_ratcliff_obershelp('pencilvaneya',
1554
                                                      'pennsylvania'),
1555
                               0.6666666666666666)
1556
        self.assertAlmostEqual(sim_ratcliff_obershelp('abcefglmn', 'abefglmo'),
1557
                               0.8235294117647058)
1558
        self.assertAlmostEqual(sim_ratcliff_obershelp('abefglmo', 'abcefglmn'),
1559
                               0.8235294117647058)
1560
1561
        with open(TESTDIR+'/corpora/variantNames.csv') as cav_testset:
1562
            next(cav_testset)
1563
            for line in cav_testset:
1564
                line = line.strip().split(',')
1565
                word1, word2 = line[0], line[4]
1566
                self.assertAlmostEqual(sim_ratcliff_obershelp(word1, word2),
1567
                                       SequenceMatcher(None, word1,
1568
                                                       word2).ratio())
1569
1570
        with open(TESTDIR+'/corpora/wikipediaCommonMisspellings.csv') as missp:
1571
            next(missp)
1572
            for line in missp:
1573
                line = line.strip().upper()
1574
                line = ''.join([_ for _ in line.strip() if _ in
1575
                                tuple('ABCDEFGHIJKLMNOPQRSTUVWXYZ,')])
1576
                word1, word2 = line.split(',')
1577
                # print(word1, word2e)
1578
                self.assertAlmostEqual(sim_ratcliff_obershelp(word1, word2),
1579
                                       SequenceMatcher(None, word1,
1580
                                                       word2).ratio())
1581
1582
    def test_dist_ratcliff_obershelp(self):
1583
        """Test abydos.distance.dist_ratcliff_obershelp."""
1584
        # https://github.com/rockymadden/stringmetric/blob/master/core/src/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala
1585
        self.assertEqual(dist_ratcliff_obershelp('', ''), 0)
1586
        self.assertEqual(dist_ratcliff_obershelp('abc', ''), 1)
1587
        self.assertEqual(dist_ratcliff_obershelp('', 'xyz'), 1)
1588
        self.assertEqual(dist_ratcliff_obershelp('abc', 'abc'), 0)
1589
        self.assertEqual(dist_ratcliff_obershelp('123', '123'), 0)
1590
        self.assertEqual(dist_ratcliff_obershelp('abc', 'xyz'), 1)
1591
        self.assertEqual(dist_ratcliff_obershelp('123', '456'), 1)
1592
        self.assertAlmostEqual(dist_ratcliff_obershelp('aleksander',
1593
                                                       'alexandre'),
1594
                               0.2631578947368421)
1595
        self.assertAlmostEqual(dist_ratcliff_obershelp('alexandre',
1596
                                                       'aleksander'),
1597
                               0.2631578947368421)
1598
        self.assertAlmostEqual(dist_ratcliff_obershelp('pennsylvania',
1599
                                                       'pencilvaneya'),
1600
                               0.3333333333333333)
1601
        self.assertAlmostEqual(dist_ratcliff_obershelp('pencilvaneya',
1602
                                                       'pennsylvania'),
1603
                               0.3333333333333333)
1604
        self.assertAlmostEqual(dist_ratcliff_obershelp('abcefglmn',
1605
                                                       'abefglmo'),
1606
                               0.1764705882352941)
1607
        self.assertAlmostEqual(dist_ratcliff_obershelp('abefglmo',
1608
                                                       'abcefglmn'),
1609
                               0.1764705882352941)
1610
1611
1612
class MraTestCases(unittest.TestCase):
1613
    """Test MRA functions.
1614
1615
    abydos.distance.mra_compare, .sim_mra & .dist_mra
1616
    """
1617
1618
    def test_mra_compare(self):
1619
        """Test abydos.distance.mra_compare."""
1620
        self.assertEqual(mra_compare('', ''), 6)
1621
        self.assertEqual(mra_compare('a', 'a'), 6)
1622
        self.assertEqual(mra_compare('abcdefg', 'abcdefg'), 6)
1623
        self.assertEqual(mra_compare('abcdefg', ''), 0)
1624
        self.assertEqual(mra_compare('', 'abcdefg'), 0)
1625
1626
        # https://en.wikipedia.org/wiki/Match_rating_approach
1627
        self.assertEqual(mra_compare('Byrne', 'Boern'), 5)
1628
        self.assertEqual(mra_compare('Smith', 'Smyth'), 5)
1629
        self.assertEqual(mra_compare('Catherine', 'Kathryn'), 4)
1630
1631
        self.assertEqual(mra_compare('ab', 'abcdefgh'), 0)
1632
        self.assertEqual(mra_compare('ab', 'ac'), 5)
1633
        self.assertEqual(mra_compare('abcdefik', 'abcdefgh'), 3)
1634
        self.assertEqual(mra_compare('xyz', 'abc'), 0)
1635
1636
    def test_sim_mra(self):
1637
        """Test abydos.distance.sim_mra."""
1638
        self.assertEqual(sim_mra('', ''), 1)
1639
        self.assertEqual(sim_mra('a', 'a'), 1)
1640
        self.assertEqual(sim_mra('abcdefg', 'abcdefg'), 1)
1641
        self.assertEqual(sim_mra('abcdefg', ''), 0)
1642
        self.assertEqual(sim_mra('', 'abcdefg'), 0)
1643
1644
        # https://en.wikipedia.org/wiki/Match_rating_approach
1645
        self.assertEqual(sim_mra('Byrne', 'Boern'), 5/6)
1646
        self.assertEqual(sim_mra('Smith', 'Smyth'), 5/6)
1647
        self.assertEqual(sim_mra('Catherine', 'Kathryn'), 4/6)
1648
1649
        self.assertEqual(sim_mra('ab', 'abcdefgh'), 0)
1650
        self.assertEqual(sim_mra('ab', 'ac'), 5/6)
1651
        self.assertEqual(sim_mra('abcdefik', 'abcdefgh'), 3/6)
1652
        self.assertEqual(sim_mra('xyz', 'abc'), 0)
1653
1654
    def test_dist_mra(self):
1655
        """Test abydos.distance.dist_mra."""
1656
        self.assertEqual(dist_mra('', ''), 0)
1657
        self.assertEqual(dist_mra('a', 'a'), 0)
1658
        self.assertEqual(dist_mra('abcdefg', 'abcdefg'), 0)
1659
        self.assertEqual(dist_mra('abcdefg', ''), 1)
1660
        self.assertEqual(dist_mra('', 'abcdefg'), 1)
1661
1662
        # https://en.wikipedia.org/wiki/Match_rating_approach
1663
        self.assertAlmostEqual(dist_mra('Byrne', 'Boern'), 1/6)
1664
        self.assertAlmostEqual(dist_mra('Smith', 'Smyth'), 1/6)
1665
        self.assertAlmostEqual(dist_mra('Catherine', 'Kathryn'), 2/6)
1666
1667
        self.assertEqual(dist_mra('ab', 'abcdefgh'), 1)
1668
        self.assertAlmostEqual(dist_mra('ab', 'ac'), 1/6)
1669
        self.assertAlmostEqual(dist_mra('abcdefik', 'abcdefgh'), 3/6)
1670
        self.assertEqual(dist_mra('xyz', 'abc'), 1)
1671
1672
1673
class CompressionTestCases(unittest.TestCase):
1674
    """Test compression distance functions.
1675
1676
    abydos.distance.dist_compression & .sim_compression
1677
    """
1678
1679
    arith_dict = ac_train(' '.join(NIALL))
1680
1681
    def test_dist_compression(self):
1682
        """Test abydos.distance.dist_compression."""
1683
        self.assertEqual(dist_compression('', ''), 0)
1684
        self.assertEqual(dist_compression('', '', 'bzip2'), 0)
1685
        self.assertEqual(dist_compression('', '', 'zlib'), 0)
1686
        self.assertEqual(dist_compression('', '', 'arith'), 0)
1687
        self.assertEqual(dist_compression('', '', 'arith', self.arith_dict), 0)
1688
        self.assertEqual(dist_compression('', '', 'rle'), 0)
1689
        self.assertEqual(dist_compression('', '', 'bwtrle'), 0)
1690
1691
        self.assertGreater(dist_compression('a', ''), 0)
1692
        self.assertGreater(dist_compression('a', '', 'bzip2'), 0)
1693
        self.assertGreater(dist_compression('a', '', 'zlib'), 0)
1694
        self.assertGreater(dist_compression('a', '', 'arith'), 0)
1695
        self.assertGreater(dist_compression('a', '', 'arith', self.arith_dict),
1696
                           0)
1697
        self.assertGreater(dist_compression('a', '', 'rle'), 0)
1698
        self.assertGreater(dist_compression('a', '', 'bwtrle'), 0)
1699
1700
        self.assertGreater(dist_compression('abcdefg', 'fg'), 0)
1701
        self.assertGreater(dist_compression('abcdefg', 'fg', 'bzip2'), 0)
1702
        self.assertGreater(dist_compression('abcdefg', 'fg', 'zlib'), 0)
1703
        self.assertGreater(dist_compression('abcdefg', 'fg', 'arith'), 0)
1704
        self.assertGreater(dist_compression('abcdefg', 'fg', 'rle'), 0)
1705
        self.assertGreater(dist_compression('abcdefg', 'fg', 'bwtrle'), 0)
1706
1707
    def test_dist_compression_arith(self):
1708
        """Test abydos.distance.dist_compression (arithmetric compression)."""
1709
        self.assertAlmostEqual(dist_compression('Niall', 'Neil', 'arith',
1710
                                                self.arith_dict),
1711
                               0.608695652173913)
1712
        self.assertAlmostEqual(dist_compression('Neil', 'Niall', 'arith',
1713
                                                self.arith_dict),
1714
                               0.608695652173913)
1715
        self.assertAlmostEqual(dist_compression('Niall', 'Neil', 'arith'),
1716
                               0.6875)
1717
        self.assertAlmostEqual(dist_compression('Neil', 'Niall', 'arith'),
1718
                               0.6875)
1719
        self.assertAlmostEqual(dist_compression('Njáll', 'Njall', 'arith',
1720
                                                self.arith_dict),
1721
                               0.714285714285714)
1722
        self.assertAlmostEqual(dist_compression('Njall', 'Njáll', 'arith',
1723
                                                self.arith_dict),
1724
                               0.714285714285714)
1725
        self.assertAlmostEqual(dist_compression('Njáll', 'Njall', 'arith'),
1726
                               0.75)
1727
        self.assertAlmostEqual(dist_compression('Njall', 'Njáll', 'arith'),
1728
                               0.75)
1729
1730
    def test_dist_compression_rle(self):
1731
        """Test abydos.distance.dist_compression (RLE & BWT+RLE)."""
1732
        self.assertAlmostEqual(dist_compression('abc', 'abc', 'rle'), 0)
1733
        self.assertAlmostEqual(dist_compression('abc', 'def', 'rle'), 1)
1734
1735
        self.assertAlmostEqual(dist_compression('abc', 'abc', 'bwtrle'), 0)
1736
        self.assertAlmostEqual(dist_compression('abc', 'def', 'bwtrle'), 0.75)
1737
1738
        self.assertAlmostEqual(dist_compression('aaa', 'bbaaa', 'rle'), 0.5)
1739
        self.assertAlmostEqual(dist_compression('abb', 'bbba', 'rle'), 1/3)
1740
        self.assertAlmostEqual(dist_compression('banana', 'banane', 'bwtrle'),
1741
                               0.57142857142)
1742
        self.assertAlmostEqual(dist_compression('bananas', 'bananen',
1743
                                                'bwtrle'),
1744
                               0.5)
1745
1746
    def test_sim_compression(self):
1747
        """Test abydos.distance.sim_compression."""
1748
        self.assertEqual(sim_compression('', ''), 1)
1749
        self.assertEqual(sim_compression('', '', 'bzip2'), 1)
1750
        self.assertEqual(sim_compression('', '', 'zlib'), 1)
1751
        self.assertEqual(sim_compression('', '', 'arith'), 1)
1752
        self.assertEqual(sim_compression('', '', 'arith', self.arith_dict), 1)
1753
        self.assertEqual(sim_compression('', '', 'rle'), 1)
1754
        self.assertEqual(sim_compression('', '', 'bwtrle'), 1)
1755
1756
        self.assertLess(sim_compression('a', ''), 1)
1757
        self.assertLess(sim_compression('a', '', 'bzip2'), 1)
1758
        self.assertLess(sim_compression('a', '', 'zlib'), 1)
1759
        self.assertLess(sim_compression('a', '', 'arith'), 1)
1760
        self.assertLess(sim_compression('a', '', 'arith', self.arith_dict), 1)
1761
        self.assertLess(sim_compression('a', '', 'rle'), 1)
1762
        self.assertLess(sim_compression('a', '', 'bwtrle'), 1)
1763
1764
        self.assertLess(sim_compression('abcdefg', 'fg'), 1)
1765
        self.assertLess(sim_compression('abcdefg', 'fg', 'bzip2'), 1)
1766
        self.assertLess(sim_compression('abcdefg', 'fg', 'zlib'), 1)
1767
        self.assertLess(sim_compression('abcdefg', 'fg', 'arith'), 1)
1768
        self.assertLess(sim_compression('abcdefg', 'fg', 'rle'), 1)
1769
        self.assertLess(sim_compression('abcdefg', 'fg', 'bwtrle'), 1)
1770
1771
    def test_sim_compression_arith(self):
1772
        """Test abydos.distance.sim_compression (arithmetric compression)."""
1773
        self.assertAlmostEqual(sim_compression('Niall', 'Neil', 'arith',
1774
                                               self.arith_dict),
1775
                               0.3913043478260869)
1776
        self.assertAlmostEqual(sim_compression('Neil', 'Niall', 'arith',
1777
                                               self.arith_dict),
1778
                               0.3913043478260869)
1779
        self.assertAlmostEqual(sim_compression('Niall', 'Neil', 'arith'),
1780
                               0.3125)
1781
        self.assertAlmostEqual(sim_compression('Neil', 'Niall', 'arith'),
1782
                               0.3125)
1783
        self.assertAlmostEqual(sim_compression('Njáll', 'Njall', 'arith',
1784
                                               self.arith_dict),
1785
                               0.285714285714285)
1786
        self.assertAlmostEqual(sim_compression('Njall', 'Njáll', 'arith',
1787
                                               self.arith_dict),
1788
                               0.285714285714285)
1789
        self.assertAlmostEqual(sim_compression('Njáll', 'Njall', 'arith'),
1790
                               0.25)
1791
        self.assertAlmostEqual(sim_compression('Njall', 'Njáll', 'arith'),
1792
                               0.25)
1793
1794
    def test_sim_compression_rle(self):
1795
        """Test abydos.distance.sim_compression (RLE & BWT+RLE)."""
1796
        self.assertAlmostEqual(sim_compression('abc', 'abc', 'rle'), 1)
1797
        self.assertAlmostEqual(sim_compression('abc', 'def', 'rle'), 0)
1798
1799
        self.assertAlmostEqual(sim_compression('abc', 'abc', 'bwtrle'), 1)
1800
        self.assertAlmostEqual(sim_compression('abc', 'def', 'bwtrle'), 0.25)
1801
1802
        self.assertAlmostEqual(sim_compression('aaa', 'bbaaa', 'rle'), 0.5)
1803
        self.assertAlmostEqual(sim_compression('abb', 'bbba', 'rle'), 2/3)
1804
        self.assertAlmostEqual(sim_compression('banana', 'banane', 'bwtrle'),
1805
                               0.42857142857)
1806
        self.assertAlmostEqual(sim_compression('bananas', 'bananen', 'bwtrle'),
1807
                               0.5)
1808
1809
    def test_lzma(self):
1810
        """Test LZMA-related sim/dist functions."""
1811
        if bool(pkgutil.find_loader('lzma')):
1812
            self.assertEqual(dist_compression('', '', 'lzma'), 0)
1813
            self.assertGreater(dist_compression('a', '', 'lzma'), 0)
1814
            self.assertGreater(dist_compression('abcdefg', 'fg', 'lzma'), 0)
1815
            self.assertEqual(sim_compression('', '', 'lzma'), 1)
1816
            self.assertLess(sim_compression('a', '', 'lzma'), 1)
1817
            self.assertLess(sim_compression('abcdefg', 'fg', 'lzma'), 1)
1818
            del sys.modules['lzma']
1819
1820
        self.assertRaises(ValueError, dist_compression, 'a', '', 'lzma')
1821
1822
1823
class MongeElkanTestCases(unittest.TestCase):
1824
    """Test Monge-Elkan functions.
1825
1826
    abydos.distance.sim_monge_elkan & .dist_monge_elkan
1827
    """
1828
1829
    def test_sim_monge_elkan(self):
1830
        """Test abydos.distance.sim_monge_elkan."""
1831
        self.assertEqual(sim_monge_elkan('', ''), 1)
1832
        self.assertEqual(sim_monge_elkan('', 'a'), 0)
1833
        self.assertEqual(sim_monge_elkan('a', 'a'), 1)
1834
1835
        self.assertEqual(sim_monge_elkan('Niall', 'Neal'), 3/4)
1836
        self.assertEqual(sim_monge_elkan('Niall', 'Njall'), 5/6)
1837
        self.assertEqual(sim_monge_elkan('Niall', 'Niel'), 3/4)
1838
        self.assertEqual(sim_monge_elkan('Niall', 'Nigel'), 3/4)
1839
1840
        self.assertEqual(sim_monge_elkan('Niall', 'Neal', symmetric=True),
1841
                         31/40)
1842
        self.assertEqual(sim_monge_elkan('Niall', 'Njall', symmetric=True),
1843
                         5/6)
1844
        self.assertEqual(sim_monge_elkan('Niall', 'Niel', symmetric=True),
1845
                         31/40)
1846
        self.assertAlmostEqual(sim_monge_elkan('Niall', 'Nigel',
1847
                                               symmetric=True), 17/24)
1848
1849
    def test_dist_monge_elkan(self):
1850
        """Test abydos.distance.dist_monge_elkan."""
1851
        self.assertEqual(dist_monge_elkan('', ''), 0)
1852
        self.assertEqual(dist_monge_elkan('', 'a'), 1)
1853
1854
        self.assertEqual(dist_monge_elkan('Niall', 'Neal'), 1/4)
1855
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Njall'), 1/6)
1856
        self.assertEqual(dist_monge_elkan('Niall', 'Niel'), 1/4)
1857
        self.assertEqual(dist_monge_elkan('Niall', 'Nigel'), 1/4)
1858
1859
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Neal',
1860
                                                symmetric=True), 9/40)
1861
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Njall',
1862
                                                symmetric=True), 1/6)
1863
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Niel',
1864
                                                symmetric=True), 9/40)
1865
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Nigel',
1866
                                                symmetric=True), 7/24)
1867
1868
1869
class IdentityTestCases(unittest.TestCase):
1870
    """Test identity similarity functions.
1871
1872
    abydos.distance.sim_ident & .dist_ident
1873
    """
1874
1875
    def test_sim_ident(self):
1876
        """Test abydos.distance.sim_ident."""
1877
        self.assertEqual(sim_ident('', ''), 1)
1878
        self.assertEqual(sim_ident('', 'a'), 0)
1879
        self.assertEqual(sim_ident('a', ''), 0)
1880
        self.assertEqual(sim_ident('a', 'a'), 1)
1881
        self.assertEqual(sim_ident('abcd', 'abcd'), 1)
1882
        self.assertEqual(sim_ident('abcd', 'dcba'), 0)
1883
        self.assertEqual(sim_ident('abc', 'cba'), 0)
1884
1885
    def test_dist_ident(self):
1886
        """Test abydos.distance.dist_ident."""
1887
        self.assertEqual(dist_ident('', ''), 0)
1888
        self.assertEqual(dist_ident('', 'a'), 1)
1889
        self.assertEqual(dist_ident('a', ''), 1)
1890
        self.assertEqual(dist_ident('a', 'a'), 0)
1891
        self.assertEqual(dist_ident('abcd', 'abcd'), 0)
1892
        self.assertEqual(dist_ident('abcd', 'dcba'), 1)
1893
        self.assertEqual(dist_ident('abc', 'cba'), 1)
1894
1895
1896
def _sim_wikipedia(src, tar):
1897
    """Return a similarity score for two DNA base pairs.
1898
1899
    Values copied from:
1900
    https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm
1901
    """
1902
    nw_matrix = {('A', 'A'): 10, ('G', 'G'): 7, ('C', 'C'): 9, ('T', 'T'): 8,
1903
                 ('A', 'G'): -1, ('A', 'C'): -3, ('A', 'T'): -4,
1904
                 ('G', 'C'): -5, ('G', 'T'): -3, ('C', 'T'): 0}
1905
    return sim_matrix(src, tar, nw_matrix, symmetric=True, alphabet='CGAT')
1906
1907
1908
def _sim_nw(src, tar):
1909
    """Return 1 if src is tar, otherwise -1."""
1910
    return 2*float(src is tar)-1
1911
1912
1913
class MatrixSimTestCases(unittest.TestCase):
1914
    """Test matrix similarity functions.
1915
1916
    abydos.distance.sim_matrix
1917
    """
1918
1919
    def test_sim_matrix(self):
1920
        """Test abydos.distance.sim_matrix."""
1921
        self.assertEqual(sim_matrix('', ''), 1)
1922
        self.assertEqual(sim_matrix('', 'a'), 0)
1923
        self.assertEqual(sim_matrix('a', ''), 0)
1924
        self.assertEqual(sim_matrix('a', 'a'), 1)
1925
        self.assertEqual(sim_matrix('abcd', 'abcd'), 1)
1926
        self.assertEqual(sim_matrix('abcd', 'dcba'), 0)
1927
        self.assertEqual(sim_matrix('abc', 'cba'), 0)
1928
1929
        # https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
1930
        self.assertEqual(_sim_wikipedia('A', 'C'), -3)
1931
        self.assertEqual(_sim_wikipedia('G', 'G'), 7)
1932
        self.assertEqual(_sim_wikipedia('A', 'A'), 10)
1933
        self.assertEqual(_sim_wikipedia('T', 'A'), -4)
1934
        self.assertEqual(_sim_wikipedia('T', 'C'), 0)
1935
        self.assertEqual(_sim_wikipedia('A', 'G'), -1)
1936
        self.assertEqual(_sim_wikipedia('C', 'T'), 0)
1937
1938
        self.assertRaises(ValueError, sim_matrix, 'abc', 'cba', alphabet='ab')
1939
        self.assertRaises(ValueError, sim_matrix, 'abc', 'ba', alphabet='ab')
1940
        self.assertRaises(ValueError, sim_matrix, 'ab', 'cba', alphabet='ab')
1941
1942
1943
class NeedlemanWunschTestCases(unittest.TestCase):
1944
    """Test Needleman-Wunsch functions.
1945
1946
    abydos.distance.needleman_wunsch
1947
    """
1948
1949
    def test_needleman_wunsch(self):
1950
        """Test abydos.distance.needleman_wunsch."""
1951
        self.assertEqual(needleman_wunsch('', ''), 0)
1952
1953
        # https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
1954
        self.assertEqual(needleman_wunsch('GATTACA', 'GCATGCU',
1955
                                          1, _sim_nw), 0)
1956
        self.assertEqual(needleman_wunsch('AGACTAGTTAC', 'CGAGACGT',
1957
                                          5, _sim_wikipedia), 16)
1958
1959
        # checked against http://ds9a.nl/nwunsch/ (mismatch=1, gap=5, skew=5)
1960
        self.assertEqual(needleman_wunsch('CGATATCAG', 'TGACGSTGC',
1961
                                          5, _sim_nw), -5)
1962
        self.assertEqual(needleman_wunsch('AGACTAGTTAC', 'TGACGSTGC',
1963
                                          5, _sim_nw), -7)
1964
        self.assertEqual(needleman_wunsch('AGACTAGTTAC', 'CGAGACGT',
1965
                                          5, _sim_nw), -15)
1966
1967
    def test_needleman_wunsch_nialls(self):
1968
        """Test abydos.distance.needleman_wunsch (Nialls set)."""
1969
        # checked against http://ds9a.nl/nwunsch/ (mismatch=1, gap=2, skew=2)
1970
        nw_vals = (5, 0, -2, 3, 1, 1, -2, -2, -1, -3, -3, -5, -3, -7, -7, -19)
1971
        for i in range(len(NIALL)):
1972
            self.assertEqual(needleman_wunsch(NIALL[0], NIALL[i], 2,
1973
                                              _sim_nw), nw_vals[i])
1974
1975
1976
class SmithWatermanTestCases(unittest.TestCase):
1977
    """Test Smith-Waterman functions.
1978
1979
    abydos.distance.smith_waterman
1980
    """
1981
1982
    def test_smith_waterman(self):
1983
        """Test abydos.distance.smith_waterman."""
1984
        self.assertEqual(smith_waterman('', ''), 0)
1985
1986
        # https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
1987
        self.assertEqual(smith_waterman('GATTACA', 'GCATGCU',
1988
                                        1, _sim_nw), 0)
1989
        self.assertEqual(smith_waterman('AGACTAGTTAC', 'CGAGACGT',
1990
                                        5, _sim_wikipedia), 26)
1991
1992
        self.assertEqual(smith_waterman('CGATATCAG', 'TGACGSTGC',
1993
                                        5, _sim_nw), 0)
1994
        self.assertEqual(smith_waterman('AGACTAGTTAC', 'TGACGSTGC',
1995
                                        5, _sim_nw), 1)
1996
        self.assertEqual(smith_waterman('AGACTAGTTAC', 'CGAGACGT',
1997
                                        5, _sim_nw), 0)
1998
1999
    def test_smith_waterman_nialls(self):
2000
        """Test abydos.distance.smith_waterman (Nialls set)."""
2001
        sw_vals = (5, 1, 1, 3, 2, 1, 1, 0, 0, 1, 1, 2, 2, 1, 0, 0)
2002
        for i in range(len(NIALL)):
2003
            self.assertEqual(smith_waterman(NIALL[0], NIALL[i], 2,
2004
                                            _sim_nw), sw_vals[i])
2005
2006
2007
class GotohTestCases(unittest.TestCase):
2008
    """Test Gotoh functions.
2009
2010
    abydos.distance.gotoh
2011
    """
2012
2013
    def test_gotoh(self):
2014
        """Test abydos.distance.needleman_wunsch_affine."""
2015
        self.assertEqual(gotoh('', ''), 0)
2016
2017
        # https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
2018
        self.assertEqual(gotoh('GATTACA', 'GCATGCU', 1, 1, _sim_nw), 0)
2019
        self.assertGreaterEqual(gotoh('GATTACA', 'GCATGCU', 1, 0.5, _sim_nw),
2020
                                needleman_wunsch('GATTACA', 'GCATGCU', 1,
2021
                                                 _sim_nw))
2022
        self.assertEqual(gotoh('AGACTAGTTAC', 'CGAGACGT', 5, 5,
2023
                               _sim_wikipedia), 16)
2024
        self.assertGreaterEqual(gotoh('AGACTAGTTAC', 'CGAGACGT', 5, 2,
2025
                                      _sim_wikipedia),
2026
                                needleman_wunsch('AGACTAGTTAC', 'CGAGACGT', 5,
2027
                                                 _sim_wikipedia))
2028
2029
        # checked against http://ds9a.nl/nwunsch/ (mismatch=1, gap=5, skew=5)
2030
        self.assertEqual(gotoh('CGATATCAG', 'TGACGSTGC', 5, 5, _sim_nw), -5)
2031
        self.assertGreaterEqual(gotoh('CGATATCAG', 'TGACGSTGC', 5, 2, _sim_nw),
2032
                                needleman_wunsch('CGATATCAG', 'TGACGSTGC', 5,
2033
                                                 _sim_nw))
2034
        self.assertEqual(gotoh('AGACTAGTTAC', 'TGACGSTGC', 5, 5, _sim_nw), -7)
2035
        self.assertGreaterEqual(gotoh('AGACTAGTTAC', 'TGACGSTGC', 5, 2,
2036
                                      _sim_nw),
2037
                                needleman_wunsch('AGACTAGTTAC', 'TGACGSTGC', 5,
2038
                                                 _sim_nw))
2039
        self.assertEqual(gotoh('AGACTAGTTAC', 'CGAGACGT', 5, 5, _sim_nw), -15)
2040
        self.assertGreaterEqual(gotoh('AGACTAGTTAC', 'CGAGACGT', 5, 2,
2041
                                      _sim_nw),
2042
                                needleman_wunsch('AGACTAGTTAC', 'CGAGACGT', 5,
2043
                                                 _sim_nw))
2044
2045
    def test_gotoh_nialls(self):
2046
        """Test abydos.distance.gotoh (Nialls set)."""
2047
        # checked against http://ds9a.nl/nwunsch/ (mismatch=1, gap=2, skew=2)
2048
        nw_vals = (5, 0, -2, 3, 1, 1, -2, -2, -1, -3, -3, -5, -3, -7, -7, -19)
2049
        for i in range(len(NIALL)):
2050
            self.assertEqual(gotoh(NIALL[0], NIALL[i], 2, 2, _sim_nw),
2051
                             nw_vals[i])
2052
        nw_vals2 = (5, 0, -2, 3, 1, 1, -2, -2, -1, -2, -3, -3, -2, -6, -6, -8)
2053
        for i in range(len(NIALL)):
2054
            self.assertEqual(gotoh(NIALL[0], NIALL[i], 2, 1, _sim_nw),
2055
                             nw_vals2[i])
2056
            self.assertGreaterEqual(gotoh(NIALL[0], NIALL[i], 2, 0.5, _sim_nw),
2057
                                    needleman_wunsch(NIALL[0], NIALL[i], 2,
2058
                                                     _sim_nw))
2059
2060
2061
class LengthTestCases(unittest.TestCase):
2062
    """Test length similarity functions.
2063
2064
    abydos.distance.sim_length & .dist_length
2065
    """
2066
2067
    def test_sim_ident(self):
2068
        """Test abydos.distance.sim_length."""
2069
        self.assertEqual(sim_length('', ''), 1)
2070
        self.assertEqual(sim_length('', 'a'), 0)
2071
        self.assertEqual(sim_length('a', ''), 0)
2072
        self.assertEqual(sim_length('a', 'a'), 1)
2073
        self.assertEqual(sim_length('abcd', 'abcd'), 1)
2074
        self.assertEqual(sim_length('abcd', 'dcba'), 1)
2075
        self.assertEqual(sim_length('abc', 'cba'), 1)
2076
        self.assertEqual(sim_length('abc', 'dcba'), 0.75)
2077
        self.assertEqual(sim_length('abcd', 'cba'), 0.75)
2078
        self.assertEqual(sim_length('ab', 'dcba'), 0.5)
2079
        self.assertEqual(sim_length('abcd', 'ba'), 0.5)
2080
2081
    def test_dist_ident(self):
2082
        """Test abydos.distance.dist_length."""
2083
        self.assertEqual(dist_length('', ''), 0)
2084
        self.assertEqual(dist_length('', 'a'), 1)
2085
        self.assertEqual(dist_length('a', ''), 1)
2086
        self.assertEqual(dist_length('a', 'a'), 0)
2087
        self.assertEqual(dist_length('abcd', 'abcd'), 0)
2088
        self.assertEqual(dist_length('abcd', 'dcba'), 0)
2089
        self.assertEqual(dist_length('abc', 'cba'), 0)
2090
        self.assertEqual(dist_length('abc', 'dcba'), 0.25)
2091
        self.assertEqual(dist_length('abcd', 'cba'), 0.25)
2092
        self.assertEqual(dist_length('ab', 'dcba'), 0.5)
2093
        self.assertEqual(dist_length('abcd', 'ba'), 0.5)
2094
2095
2096
class PrefixTestCases(unittest.TestCase):
2097
    """Test prefix similarity functions.
2098
2099
    abydos.distance.sim_prefix & .dist_prefix
2100
    """
2101
2102
    def test_sim_prefix(self):
2103
        """Test abydos.distance.sim_prefix."""
2104
        self.assertEqual(sim_prefix('', ''), 1)
2105
        self.assertEqual(sim_prefix('a', ''), 0)
2106
        self.assertEqual(sim_prefix('', 'a'), 0)
2107
        self.assertEqual(sim_prefix('a', 'a'), 1)
2108
        self.assertEqual(sim_prefix('ax', 'a'), 1)
2109
        self.assertEqual(sim_prefix('axx', 'a'), 1)
2110
        self.assertEqual(sim_prefix('ax', 'ay'), 1/2)
2111
        self.assertEqual(sim_prefix('a', 'ay'), 1)
2112
        self.assertEqual(sim_prefix('a', 'ayy'), 1)
2113
        self.assertEqual(sim_prefix('ax', 'ay'), 1/2)
2114
        self.assertEqual(sim_prefix('a', 'y'), 0)
2115
        self.assertEqual(sim_prefix('y', 'a'), 0)
2116
        self.assertEqual(sim_prefix('aaax', 'aaa'), 1)
2117
        self.assertAlmostEqual(sim_prefix('axxx', 'aaa'), 1/3)
2118
        self.assertEqual(sim_prefix('aaxx', 'aayy'), 1/2)
2119
        self.assertEqual(sim_prefix('xxaa', 'yyaa'), 0)
2120
        self.assertAlmostEqual(sim_prefix('aaxxx', 'aay'), 2/3)
2121
        self.assertEqual(sim_prefix('aaxxxx', 'aayyy'), 2/5)
2122
        self.assertEqual(sim_prefix('xa', 'a'), 0)
2123
        self.assertEqual(sim_prefix('xxa', 'a'), 0)
2124
        self.assertEqual(sim_prefix('xa', 'ya'), 0)
2125
        self.assertEqual(sim_prefix('a', 'ya'), 0)
2126
        self.assertEqual(sim_prefix('a', 'yya'), 0)
2127
        self.assertEqual(sim_prefix('xa', 'ya'), 0)
2128
        self.assertEqual(sim_prefix('xaaa', 'aaa'), 0)
2129
        self.assertEqual(sim_prefix('xxxa', 'aaa'), 0)
2130
        self.assertEqual(sim_prefix('xxxaa', 'yaa'), 0)
2131
        self.assertEqual(sim_prefix('xxxxaa', 'yyyaa'), 0)
2132
2133
    def test_dist_prefix(self):
2134
        """Test abydos.distance.dist_prefix."""
2135
        self.assertEqual(dist_prefix('', ''), 0)
2136
        self.assertEqual(dist_prefix('a', ''), 1)
2137
        self.assertEqual(dist_prefix('', 'a'), 1)
2138
        self.assertEqual(dist_prefix('a', 'a'), 0)
2139
        self.assertEqual(dist_prefix('ax', 'a'), 0)
2140
        self.assertEqual(dist_prefix('axx', 'a'), 0)
2141
        self.assertEqual(dist_prefix('ax', 'ay'), 1/2)
2142
        self.assertEqual(dist_prefix('a', 'ay'), 0)
2143
        self.assertEqual(dist_prefix('a', 'ayy'), 0)
2144
        self.assertEqual(dist_prefix('ax', 'ay'), 1/2)
2145
        self.assertEqual(dist_prefix('a', 'y'), 1)
2146
        self.assertEqual(dist_prefix('y', 'a'), 1)
2147
        self.assertEqual(dist_prefix('aaax', 'aaa'), 0)
2148
        self.assertAlmostEqual(dist_prefix('axxx', 'aaa'), 2/3)
2149
        self.assertEqual(dist_prefix('aaxx', 'aayy'), 1/2)
2150
        self.assertEqual(dist_prefix('xxaa', 'yyaa'), 1)
2151
        self.assertAlmostEqual(dist_prefix('aaxxx', 'aay'), 1/3)
2152
        self.assertEqual(dist_prefix('aaxxxx', 'aayyy'), 3/5)
2153
        self.assertEqual(dist_prefix('xa', 'a'), 1)
2154
        self.assertEqual(dist_prefix('xxa', 'a'), 1)
2155
        self.assertEqual(dist_prefix('xa', 'ya'), 1)
2156
        self.assertEqual(dist_prefix('a', 'ya'), 1)
2157
        self.assertEqual(dist_prefix('a', 'yya'), 1)
2158
        self.assertEqual(dist_prefix('xa', 'ya'), 1)
2159
        self.assertEqual(dist_prefix('xaaa', 'aaa'), 1)
2160
        self.assertEqual(dist_prefix('xxxa', 'aaa'), 1)
2161
        self.assertEqual(dist_prefix('xxxaa', 'yaa'), 1)
2162
        self.assertEqual(dist_prefix('xxxxaa', 'yyyaa'), 1)
2163
2164
2165
class SuffixTestCases(unittest.TestCase):
2166
    """Test suffix similarity functions.
2167
2168
    abydos.distance.sim_suffix & .dist_suffix
2169
    """
2170
2171
    def test_sim_suffix(self):
2172
        """Test abydos.distance.sim_suffix."""
2173
        self.assertEqual(sim_suffix('', ''), 1)
2174
        self.assertEqual(sim_suffix('a', ''), 0)
2175
        self.assertEqual(sim_suffix('', 'a'), 0)
2176
        self.assertEqual(sim_suffix('a', 'a'), 1)
2177
        self.assertEqual(sim_suffix('ax', 'a'), 0)
2178
        self.assertEqual(sim_suffix('axx', 'a'), 0)
2179
        self.assertEqual(sim_suffix('ax', 'ay'), 0)
2180
        self.assertEqual(sim_suffix('a', 'ay'), 0)
2181
        self.assertEqual(sim_suffix('a', 'ayy'), 0)
2182
        self.assertEqual(sim_suffix('ax', 'ay'), 0)
2183
        self.assertEqual(sim_suffix('a', 'y'), 0)
2184
        self.assertEqual(sim_suffix('y', 'a'), 0)
2185
        self.assertEqual(sim_suffix('aaax', 'aaa'), 0)
2186
        self.assertEqual(sim_suffix('axxx', 'aaa'), 0)
2187
        self.assertEqual(sim_suffix('aaxx', 'aayy'), 0)
2188
        self.assertEqual(sim_suffix('xxaa', 'yyaa'), 1/2)
2189
        self.assertEqual(sim_suffix('aaxxx', 'aay'), 0)
2190
        self.assertEqual(sim_suffix('aaxxxx', 'aayyy'), 0)
2191
        self.assertEqual(sim_suffix('xa', 'a'), 1)
2192
        self.assertEqual(sim_suffix('xxa', 'a'), 1)
2193
        self.assertEqual(sim_suffix('xa', 'ya'), 1/2)
2194
        self.assertEqual(sim_suffix('a', 'ya'), 1)
2195
        self.assertEqual(sim_suffix('a', 'yya'), 1)
2196
        self.assertEqual(sim_suffix('xa', 'ya'), 1/2)
2197
        self.assertEqual(sim_suffix('xaaa', 'aaa'), 1)
2198
        self.assertAlmostEqual(sim_suffix('xxxa', 'aaa'), 1/3)
2199
        self.assertAlmostEqual(sim_suffix('xxxaa', 'yaa'), 2/3)
2200
        self.assertEqual(sim_suffix('xxxxaa', 'yyyaa'), 2/5)
2201
2202
    def test_dist_suffix(self):
2203
        """Test abydos.distance.dist_suffix."""
2204
        self.assertEqual(dist_suffix('', ''), 0)
2205
        self.assertEqual(dist_suffix('a', ''), 1)
2206
        self.assertEqual(dist_suffix('', 'a'), 1)
2207
        self.assertEqual(dist_suffix('a', 'a'), 0)
2208
        self.assertEqual(dist_suffix('ax', 'a'), 1)
2209
        self.assertEqual(dist_suffix('axx', 'a'), 1)
2210
        self.assertEqual(dist_suffix('ax', 'ay'), 1)
2211
        self.assertEqual(dist_suffix('a', 'ay'), 1)
2212
        self.assertEqual(dist_suffix('a', 'ayy'), 1)
2213
        self.assertEqual(dist_suffix('ax', 'ay'), 1)
2214
        self.assertEqual(dist_suffix('a', 'y'), 1)
2215
        self.assertEqual(dist_suffix('y', 'a'), 1)
2216
        self.assertEqual(dist_suffix('aaax', 'aaa'), 1)
2217
        self.assertEqual(dist_suffix('axxx', 'aaa'), 1)
2218
        self.assertEqual(dist_suffix('aaxx', 'aayy'), 1)
2219
        self.assertEqual(dist_suffix('xxaa', 'yyaa'), 1/2)
2220
        self.assertEqual(dist_suffix('aaxxx', 'aay'), 1)
2221
        self.assertEqual(dist_suffix('aaxxxx', 'aayyy'), 1)
2222
        self.assertEqual(dist_suffix('xa', 'a'), 0)
2223
        self.assertEqual(dist_suffix('xxa', 'a'), 0)
2224
        self.assertEqual(dist_suffix('xa', 'ya'), 1/2)
2225
        self.assertEqual(dist_suffix('a', 'ya'), 0)
2226
        self.assertEqual(dist_suffix('a', 'yya'), 0)
2227
        self.assertEqual(dist_suffix('xa', 'ya'), 1/2)
2228
        self.assertEqual(dist_suffix('xaaa', 'aaa'), 0)
2229
        self.assertAlmostEqual(dist_suffix('xxxa', 'aaa'), 2/3)
2230
        self.assertAlmostEqual(dist_suffix('xxxaa', 'yaa'), 1/3)
2231
        self.assertEqual(dist_suffix('xxxxaa', 'yyyaa'), 3/5)
2232
2233
2234
class MLIPNSTestCases(unittest.TestCase):
2235
    """Test MLIPNS functions.
2236
2237
    abydos.distance.sim_mlipns & .dist_mlipns
2238
    """
2239
2240
    def test_sim_mlipns(self):
2241
        """Test abydos.distance.sim_mlipns."""
2242
        self.assertEqual(sim_mlipns('', ''), 1)
2243
        self.assertEqual(sim_mlipns('a', ''), 0)
2244
        self.assertEqual(sim_mlipns('', 'a'), 0)
2245
        self.assertEqual(sim_mlipns('a', 'a'), 1)
2246
        self.assertEqual(sim_mlipns('ab', 'a'), 1)
2247
        self.assertEqual(sim_mlipns('abc', 'abc'), 1)
2248
        self.assertEqual(sim_mlipns('abc', 'abcde'), 1)
2249
        self.assertEqual(sim_mlipns('abcg', 'abcdeg'), 1)
2250
        self.assertEqual(sim_mlipns('abcg', 'abcdefg'), 0)
2251
        self.assertEqual(sim_mlipns('Tomato', 'Tamato'), 1)
2252
        self.assertEqual(sim_mlipns('ato', 'Tam'), 1)
2253
2254
    def test_dist_mlipns(self):
2255
        """Test abydos.distance.dist_mlipns."""
2256
        self.assertEqual(dist_mlipns('', ''), 0)
2257
        self.assertEqual(dist_mlipns('a', ''), 1)
2258
        self.assertEqual(dist_mlipns('', 'a'), 1)
2259
        self.assertEqual(dist_mlipns('a', 'a'), 0)
2260
        self.assertEqual(dist_mlipns('ab', 'a'), 0)
2261
        self.assertEqual(dist_mlipns('abc', 'abc'), 0)
2262
        self.assertEqual(dist_mlipns('abc', 'abcde'), 0)
2263
        self.assertEqual(dist_mlipns('abcg', 'abcdeg'), 0)
2264
        self.assertEqual(dist_mlipns('abcg', 'abcdefg'), 1)
2265
        self.assertEqual(dist_mlipns('Tomato', 'Tamato'), 0)
2266
        self.assertEqual(dist_mlipns('ato', 'Tam'), 0)
2267
2268
2269
class BagTestCases(unittest.TestCase):
2270
    """Test bag similarity functions.
2271
2272
    abydos.distance.bag, .sim_bag & .dist_bag
2273
    """
2274
2275
    def test_bag(self):
2276
        """Test abydos.distance.bag."""
2277
        self.assertEqual(bag('', ''), 0)
2278
        self.assertEqual(bag('nelson', ''), 6)
2279
        self.assertEqual(bag('', 'neilsen'), 7)
2280
        self.assertEqual(bag('ab', 'a'), 1)
2281
        self.assertEqual(bag('ab', 'c'), 2)
2282
        self.assertEqual(bag('nelson', 'neilsen'), 2)
2283
        self.assertEqual(bag('neilsen', 'nelson'), 2)
2284
        self.assertEqual(bag('niall', 'neal'), 2)
2285
        self.assertEqual(bag('aluminum', 'Catalan'), 5)
2286
        self.assertEqual(bag('abcdefg', 'hijklm'), 7)
2287
        self.assertEqual(bag('abcdefg', 'hijklmno'), 8)
2288
2289
    def test_sim_bag(self):
2290
        """Test abydos.distance.sim_bag."""
2291
        self.assertEqual(sim_bag('', ''), 1)
2292
        self.assertEqual(sim_bag('nelson', ''), 0)
2293
        self.assertEqual(sim_bag('', 'neilsen'), 0)
2294
        self.assertEqual(sim_bag('ab', 'a'), 0.5)
2295
        self.assertEqual(sim_bag('ab', 'c'), 0)
2296
        self.assertAlmostEqual(sim_bag('nelson', 'neilsen'), 5/7)
2297
        self.assertAlmostEqual(sim_bag('neilsen', 'nelson'), 5/7)
2298
        self.assertAlmostEqual(sim_bag('niall', 'neal'), 3/5)
2299
        self.assertAlmostEqual(sim_bag('aluminum', 'Catalan'), 3/8)
2300
        self.assertEqual(sim_bag('abcdefg', 'hijklm'), 0)
2301
        self.assertEqual(sim_bag('abcdefg', 'hijklmno'), 0)
2302
2303
    def test_dist_bag(self):
2304
        """Test abydos.distance.dist_bag."""
2305
        self.assertEqual(dist_bag('', ''), 0)
2306
        self.assertEqual(dist_bag('nelson', ''), 1)
2307
        self.assertEqual(dist_bag('', 'neilsen'), 1)
2308
        self.assertEqual(dist_bag('ab', 'a'), 0.5)
2309
        self.assertEqual(dist_bag('ab', 'c'), 1)
2310
        self.assertAlmostEqual(dist_bag('nelson', 'neilsen'), 2/7)
2311
        self.assertAlmostEqual(dist_bag('neilsen', 'nelson'), 2/7)
2312
        self.assertAlmostEqual(dist_bag('niall', 'neal'), 2/5)
2313
        self.assertAlmostEqual(dist_bag('aluminum', 'Catalan'), 5/8)
2314
        self.assertEqual(dist_bag('abcdefg', 'hijklm'), 1)
2315
        self.assertEqual(dist_bag('abcdefg', 'hijklmno'), 1)
2316
2317
2318
class EditexTestCases(unittest.TestCase):
2319
    """Test Editex functions.
2320
2321
    abydos.distance.editex, .sim_editex & .dist_editex
2322
    """
2323
2324
    def test_editex(self):
2325
        """Test abydos.distance.editex."""
2326
        self.assertEqual(editex('', ''), 0)
2327
        self.assertEqual(editex('nelson', ''), 12)
2328
        self.assertEqual(editex('', 'neilsen'), 14)
2329
        self.assertEqual(editex('ab', 'a'), 2)
2330
        self.assertEqual(editex('ab', 'c'), 4)
2331
        self.assertEqual(editex('nelson', 'neilsen'), 2)
2332
        self.assertEqual(editex('neilsen', 'nelson'), 2)
2333
        self.assertEqual(editex('niall', 'neal'), 1)
2334
        self.assertEqual(editex('neal', 'niall'), 1)
2335
        self.assertEqual(editex('niall', 'nihal'), 2)
2336
        self.assertEqual(editex('nihal', 'niall'), 2)
2337
        self.assertEqual(editex('neal', 'nihl'), 3)
2338
        self.assertEqual(editex('nihl', 'neal'), 3)
2339
2340
    def test_editex_local(self):
2341
        """Test abydos.distance.editex (local variant)."""
2342
        self.assertEqual(editex('', '', local=True), 0)
2343
        self.assertEqual(editex('nelson', '', local=True), 12)
2344
        self.assertEqual(editex('', 'neilsen', local=True), 14)
2345
        self.assertEqual(editex('ab', 'a', local=True), 2)
2346
        self.assertEqual(editex('ab', 'c', local=True), 2)
2347
        self.assertEqual(editex('nelson', 'neilsen', local=True), 2)
2348
        self.assertEqual(editex('neilsen', 'nelson', local=True), 2)
2349
        self.assertEqual(editex('niall', 'neal', local=True), 1)
2350
        self.assertEqual(editex('neal', 'niall', local=True), 1)
2351
        self.assertEqual(editex('niall', 'nihal', local=True), 2)
2352
        self.assertEqual(editex('nihal', 'niall', local=True), 2)
2353
        self.assertEqual(editex('neal', 'nihl', local=True), 3)
2354
        self.assertEqual(editex('nihl', 'neal', local=True), 3)
2355
2356
    def test_sim_editex(self):
2357
        """Test abydos.distance.sim_editex."""
2358
        self.assertEqual(sim_editex('', ''), 1)
2359
        self.assertEqual(sim_editex('nelson', ''), 0)
2360
        self.assertEqual(sim_editex('', 'neilsen'), 0)
2361
        self.assertEqual(sim_editex('ab', 'a'), 0.5)
2362
        self.assertEqual(sim_editex('ab', 'c'), 0)
2363
        self.assertAlmostEqual(sim_editex('nelson', 'neilsen'), 12/14)
2364
        self.assertAlmostEqual(sim_editex('neilsen', 'nelson'), 12/14)
2365
        self.assertEqual(sim_editex('niall', 'neal'), 0.9)
2366
2367
    def test_dist_editex(self):
2368
        """Test abydos.distance.dist_editex."""
2369
        self.assertEqual(dist_editex('', ''), 0)
2370
        self.assertEqual(dist_editex('nelson', ''), 1)
2371
        self.assertEqual(dist_editex('', 'neilsen'), 1)
2372
        self.assertEqual(dist_editex('ab', 'a'), 0.5)
2373
        self.assertEqual(dist_editex('ab', 'c'), 1)
2374
        self.assertAlmostEqual(dist_editex('nelson', 'neilsen'), 2/14)
2375
        self.assertAlmostEqual(dist_editex('neilsen', 'nelson'), 2/14)
2376
        self.assertEqual(dist_editex('niall', 'neal'), 0.1)
2377
2378
2379
class EudexTestCases(unittest.TestCase):
2380
    """Test Eudex distance functions.
2381
2382
    abydos.distance.eudex_hamming, dist_eudex, & sim_eudex
2383
    """
2384
2385
    def test_eudex_hamming(self):
2386
        """Test abydos.distance.eudex_hamming."""
2387
        # Base cases
2388
        self.assertEqual(eudex_hamming('', ''), 0)
2389
        self.assertEqual(eudex_hamming('', '', None), 0)
2390
        self.assertEqual(eudex_hamming('', '', 'fibonacci'), 0)
2391
        self.assertEqual(eudex_hamming('', '', [10, 1, 1, 1]), 0)
2392
        self.assertEqual(eudex_hamming('', '',
2393
                                       lambda: [(yield 1) for _
2394
                                                in range(10)]), 0)
2395
        self.assertEqual(eudex_hamming('', '', normalized=True), 0)
2396
2397
        self.assertEqual(eudex_hamming('Niall', 'Niall'), 0)
2398
        self.assertEqual(eudex_hamming('Niall', 'Niall', None), 0)
2399
        self.assertEqual(eudex_hamming('Niall', 'Niall', 'fibonacci'), 0)
2400
        self.assertEqual(eudex_hamming('Niall', 'Niall', [10, 1, 1, 1]), 0)
2401
        self.assertEqual(eudex_hamming('Niall', 'Niall',
2402
                                       lambda: [(yield 1) for _
2403
                                                in range(10)]), 0)
2404
        self.assertEqual(eudex_hamming('Niall', 'Niall', normalized=True), 0)
2405
2406
        self.assertEqual(eudex_hamming('Niall', 'Neil'), 2)
2407
        self.assertEqual(eudex_hamming('Niall', 'Neil', None), 1)
2408
        self.assertEqual(eudex_hamming('Niall', 'Neil', 'fibonacci'), 2)
2409
        self.assertEqual(eudex_hamming('Niall', 'Neil', [10, 1, 1, 1]), 1)
2410
        self.assertEqual(eudex_hamming('Niall', 'Neil',
2411
                                       lambda: [(yield 1) for _
2412
                                                in range(10)]), 1)
2413
        self.assertAlmostEqual(eudex_hamming('Niall', 'Neil', normalized=True),
2414
                               0.00098039)
2415
2416
        self.assertEqual(eudex_hamming('Niall', 'Colin'), 524)
2417
        self.assertEqual(eudex_hamming('Niall', 'Colin', None), 10)
2418
        self.assertEqual(eudex_hamming('Niall', 'Colin', 'fibonacci'), 146)
2419
        self.assertEqual(eudex_hamming('Niall', 'Colin', [10, 1, 1, 1]), 6)
2420
        self.assertEqual(eudex_hamming('Niall', 'Colin',
2421
                                       lambda: [(yield 1) for _
2422
                                                in range(10)]), 10)
2423
        self.assertAlmostEqual(eudex_hamming('Niall', 'Colin',
2424
                                             normalized=True), 0.25686274)
2425
2426
    def test_dist_eudex(self):
2427
        """Test abydos.distance.dist_eudex."""
2428
        # Base cases
2429
        self.assertEqual(dist_eudex('', ''), 0)
2430
        self.assertEqual(dist_eudex('', '', None), 0)
2431
        self.assertEqual(dist_eudex('', '', 'fibonacci'), 0)
2432
2433
        self.assertEqual(dist_eudex('Niall', 'Niall'), 0)
2434
        self.assertEqual(dist_eudex('Niall', 'Niall', None), 0)
2435
        self.assertEqual(dist_eudex('Niall', 'Niall', 'fibonacci'), 0)
2436
2437
        self.assertAlmostEqual(dist_eudex('Niall', 'Neil'), 0.00098039)
2438
        self.assertAlmostEqual(dist_eudex('Niall', 'Neil', None), 0.11111111)
2439
        self.assertAlmostEqual(dist_eudex('Niall', 'Neil', 'fibonacci'),
2440
                               0.00287356)
2441
2442
        self.assertAlmostEqual(dist_eudex('Niall', 'Colin'), 0.25686275)
2443
        self.assertAlmostEqual(dist_eudex('Niall', 'Colin', None), 0.16666667)
2444
        self.assertAlmostEqual(dist_eudex('Niall', 'Colin', 'fibonacci'),
2445
                               0.20977011)
2446
2447
    def test_sim_eudex(self):
2448
        """Test abydos.distance.sim_eudex."""
2449
        # Base cases
2450
        self.assertEqual(sim_eudex('', ''), 1)
2451
        self.assertEqual(sim_eudex('', '', None), 1)
2452
        self.assertEqual(sim_eudex('', '', 'fibonacci'), 1)
2453
2454
        self.assertEqual(sim_eudex('Niall', 'Niall'), 1)
2455
        self.assertEqual(sim_eudex('Niall', 'Niall', None), 1)
2456
        self.assertEqual(sim_eudex('Niall', 'Niall', 'fibonacci'), 1)
2457
2458
        self.assertAlmostEqual(sim_eudex('Niall', 'Neil'), 0.99901961)
2459
        self.assertAlmostEqual(sim_eudex('Niall', 'Neil', None), 0.88888889)
2460
        self.assertAlmostEqual(sim_eudex('Niall', 'Neil', 'fibonacci'),
2461
                               0.99712644)
2462
2463
        self.assertAlmostEqual(sim_eudex('Niall', 'Colin'), 0.74313725)
2464
        self.assertAlmostEqual(sim_eudex('Niall', 'Colin', None), 0.83333333)
2465
        self.assertAlmostEqual(sim_eudex('Niall', 'Colin', 'fibonacci'),
2466
                               0.79022989)
2467
2468
2469
class Sift4TestCases(unittest.TestCase):
2470
    """Test Sift4 functions.
2471
2472
    abydos.distance.sift4_simplest, sift4_common, sim_sift4, & sim_sift4
2473
    """
2474
2475
    def test_sift4_simplest(self):
2476
        """Test abydos.distance.sift4_simplest."""
2477
        # tests copied from Lukas Benedix's post at
2478
        # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
2479
        self.assertEqual(sift4_simplest('', ''), 0)
2480
        self.assertEqual(sift4_simplest('a', ''), 1)
2481
        self.assertEqual(sift4_simplest('', 'a'), 1)
2482
        self.assertEqual(sift4_simplest('abc', ''), 3)
2483
        self.assertEqual(sift4_simplest('', 'abc'), 3)
2484
2485
        self.assertEqual(sift4_simplest('a', 'a'), 0)
2486
        self.assertEqual(sift4_simplest('abc', 'abc'), 0)
2487
2488
        self.assertEqual(sift4_simplest('a', 'ab'), 1)
2489
        self.assertEqual(sift4_simplest('ac', 'abc'), 1)
2490
        self.assertEqual(sift4_simplest('abcdefg', 'xabxcdxxefxgx'), 10)
2491
2492
        self.assertEqual(sift4_simplest('ab', 'b'), 1)
2493
        self.assertEqual(sift4_simplest('ab', 'a'), 1)
2494
        self.assertEqual(sift4_simplest('abc', 'ac'), 1)
2495
        self.assertEqual(sift4_simplest('xabxcdxxefxgx', 'abcdefg'), 10)
2496
2497
        self.assertEqual(sift4_simplest('a', 'b'), 1)
2498
        self.assertEqual(sift4_simplest('ab', 'ac'), 1)
2499
        self.assertEqual(sift4_simplest('ac', 'bc'), 1)
2500
        self.assertEqual(sift4_simplest('abc', 'axc'), 1)
2501
        self.assertEqual(sift4_simplest('xabxcdxxefxgx', '1ab2cd34ef5g6'), 6)
2502
2503
        self.assertEqual(sift4_simplest('example', 'samples'), 2)
2504
        self.assertEqual(sift4_simplest('sturgeon', 'urgently'), 4)
2505
        self.assertEqual(sift4_simplest('levenshtein', 'frankenstein'), 10)
2506
        self.assertEqual(sift4_simplest('distance', 'difference'), 7)
2507
2508
        # Tests copied from
2509
        # https://github.com/tdebatty/java-string-similarity/blob/master/src/test/java/info/debatty/java/stringsimilarity/experimental/Sift4Test.java
2510
        self.assertEqual(sift4_simplest('This is the first string',
2511
                                        'And this is another string', 5), 13)
2512
        self.assertEqual(sift4_simplest('Lorem ipsum dolor sit amet, ' +
2513
                                        'consectetur adipiscing elit.',
2514
                                        'Amet Lorm ispum dolor sit amet, ' +
2515
                                        'consetetur adixxxpiscing elit.',
2516
                                        10), 20)
2517
2518
    def test_sift4_common(self):
2519
        """Test abydos.distance.sift4_common."""
2520
        # tests copied from Lukas Benedix's post at
2521
        # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
2522
        self.assertEqual(sift4_common('', ''), 0)
2523
        self.assertEqual(sift4_common('a', ''), 1)
2524
        self.assertEqual(sift4_common('', 'a'), 1)
2525
        self.assertEqual(sift4_common('abc', ''), 3)
2526
        self.assertEqual(sift4_common('', 'abc'), 3)
2527
2528
        self.assertEqual(sift4_common('a', 'a'), 0)
2529
        self.assertEqual(sift4_common('abc', 'abc'), 0)
2530
2531
        self.assertEqual(sift4_common('a', 'ab'), 1)
2532
        self.assertEqual(sift4_common('ac', 'abc'), 1)
2533
        self.assertEqual(sift4_common('abcdefg', 'xabxcdxxefxgx'), 7)
2534
2535
        self.assertEqual(sift4_common('ab', 'b'), 1)
2536
        self.assertEqual(sift4_common('ab', 'a'), 1)
2537
        self.assertEqual(sift4_common('abc', 'ac'), 1)
2538
        self.assertEqual(sift4_common('xabxcdxxefxgx', 'abcdefg'), 7)
2539
2540
        self.assertEqual(sift4_common('a', 'b'), 1)
2541
        self.assertEqual(sift4_common('ab', 'ac'), 1)
2542
        self.assertEqual(sift4_common('ac', 'bc'), 1)
2543
        self.assertEqual(sift4_common('abc', 'axc'), 1)
2544
        self.assertEqual(sift4_common('xabxcdxxefxgx', '1ab2cd34ef5g6'), 6)
2545
2546
        self.assertEqual(sift4_common('example', 'samples'), 2)
2547
        self.assertEqual(sift4_common('sturgeon', 'urgently'), 3)
2548
        self.assertEqual(sift4_common('levenshtein', 'frankenstein'), 6)
2549
        self.assertEqual(sift4_common('distance', 'difference'), 5)
2550
2551
        # Tests copied from
2552
        # https://github.com/tdebatty/java-string-similarity/blob/master/src/test/java/info/debatty/java/stringsimilarity/experimental/Sift4Test.java
2553
        self.assertEqual(sift4_common('This is the first string',
2554
                                      'And this is another string', 5), 11)
2555
        self.assertEqual(sift4_common('Lorem ipsum dolor sit amet, ' +
2556
                                      'consectetur adipiscing elit.',
2557
                                      'Amet Lorm ispum dolor sit amet, ' +
2558
                                      'consetetur adixxxpiscing elit.',
2559
                                      10), 12)
2560
2561
        # cases with max_distance
2562
        self.assertEqual(sift4_common('example', 'samples', 5, 5), 5)
2563
        self.assertEqual(sift4_common('sturgeon', 'urgently', 5, 5), 5)
2564
        self.assertEqual(sift4_common('levenshtein', 'frankenstein', 5, 5), 5)
2565
        self.assertEqual(sift4_common('distance', 'difference', 5, 5), 5)
2566
2567
    def test_dist_sift4(self):
2568
        """Test abydos.distance.dist_sift4."""
2569
        # tests copied from Lukas Benedix's post at
2570
        # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
2571
        self.assertEqual(dist_sift4('', ''), 0)
2572
        self.assertEqual(dist_sift4('a', ''), 1)
2573
        self.assertEqual(dist_sift4('', 'a'), 1)
2574
        self.assertEqual(dist_sift4('abc', ''), 1)
2575
        self.assertEqual(dist_sift4('', 'abc'), 1)
2576
2577
        self.assertEqual(dist_sift4('a', 'a'), 0)
2578
        self.assertEqual(dist_sift4('abc', 'abc'), 0)
2579
2580
        self.assertEqual(dist_sift4('a', 'ab'), 0.5)
2581
        self.assertEqual(dist_sift4('ac', 'abc'), 1/3)
2582
        self.assertAlmostEqual(dist_sift4('abcdefg', 'xabxcdxxefxgx'),
2583
                               0.538461538)
2584
2585
        self.assertEqual(dist_sift4('ab', 'b'), 0.5)
2586
        self.assertEqual(dist_sift4('ab', 'a'), 0.5)
2587
        self.assertEqual(dist_sift4('abc', 'ac'), 1/3)
2588
        self.assertAlmostEqual(dist_sift4('xabxcdxxefxgx', 'abcdefg'),
2589
                               0.538461538)
2590
2591
        self.assertEqual(dist_sift4('a', 'b'), 1)
2592
        self.assertEqual(dist_sift4('ab', 'ac'), 0.5)
2593
        self.assertEqual(dist_sift4('ac', 'bc'), 0.5)
2594
        self.assertEqual(dist_sift4('abc', 'axc'), 1/3)
2595
        self.assertAlmostEqual(dist_sift4('xabxcdxxefxgx', '1ab2cd34ef5g6'),
2596
                               0.461538461)
2597
2598
        self.assertAlmostEqual(dist_sift4('example', 'samples'), 0.285714285)
2599
        self.assertAlmostEqual(dist_sift4('sturgeon', 'urgently'), 0.375)
2600
        self.assertAlmostEqual(dist_sift4('levenshtein', 'frankenstein'), 0.5)
2601
        self.assertAlmostEqual(dist_sift4('distance', 'difference'), 0.5)
2602
2603
        # Tests copied from
2604
        # https://github.com/tdebatty/java-string-similarity/blob/master/src/test/java/info/debatty/java/stringsimilarity/experimental/Sift4Test.java
2605
        self.assertAlmostEqual(dist_sift4('This is the first string',
2606
                                          'And this is another string',
2607
                                          5), 0.423076923)
2608
        self.assertAlmostEqual(dist_sift4('Lorem ipsum dolor sit amet, ' +
2609
                                          'consectetur adipiscing elit.',
2610
                                          'Amet Lorm ispum dolor sit amet, ' +
2611
                                          'consetetur adixxxpiscing elit.',
2612
                                          10), 0.193548387)
2613
2614
        # cases with max_distance
2615
        self.assertAlmostEqual(dist_sift4('example', 'samples', 5, 5),
2616
                               0.714285714)
2617
        self.assertAlmostEqual(dist_sift4('sturgeon', 'urgently', 5, 5), 0.625)
2618
        self.assertAlmostEqual(dist_sift4('levenshtein', 'frankenstein', 5, 5),
2619
                               0.416666666)
2620
        self.assertAlmostEqual(dist_sift4('distance', 'difference', 5, 5), 0.5)
2621
2622
    def test_sim_sift4(self):
2623
        """Test abydos.distance.sim_sift4."""
2624
        # tests copied from Lukas Benedix's post at
2625
        # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
2626
        self.assertEqual(sim_sift4('', ''), 1)
2627
        self.assertEqual(sim_sift4('a', ''), 0)
2628
        self.assertEqual(sim_sift4('', 'a'), 0)
2629
        self.assertEqual(sim_sift4('abc', ''), 0)
2630
        self.assertEqual(sim_sift4('', 'abc'), 0)
2631
2632
        self.assertEqual(sim_sift4('a', 'a'), 1)
2633
        self.assertEqual(sim_sift4('abc', 'abc'), 1)
2634
2635
        self.assertEqual(sim_sift4('a', 'ab'), 0.5)
2636
        self.assertAlmostEqual(sim_sift4('ac', 'abc'), 2/3)
2637
        self.assertAlmostEqual(sim_sift4('abcdefg', 'xabxcdxxefxgx'),
2638
                               0.461538461)
2639
2640
        self.assertEqual(sim_sift4('ab', 'b'), 0.5)
2641
        self.assertEqual(sim_sift4('ab', 'a'), 0.5)
2642
        self.assertAlmostEqual(sim_sift4('abc', 'ac'), 2/3)
2643
        self.assertAlmostEqual(sim_sift4('xabxcdxxefxgx', 'abcdefg'),
2644
                               0.461538461)
2645
2646
        self.assertEqual(sim_sift4('a', 'b'), 0)
2647
        self.assertEqual(sim_sift4('ab', 'ac'), 0.5)
2648
        self.assertEqual(sim_sift4('ac', 'bc'), 0.5)
2649
        self.assertAlmostEqual(sim_sift4('abc', 'axc'), 2/3)
2650
        self.assertAlmostEqual(sim_sift4('xabxcdxxefxgx', '1ab2cd34ef5g6'),
2651
                               0.538461538)
2652
2653
        self.assertAlmostEqual(sim_sift4('example', 'samples'), 0.714285714)
2654
        self.assertAlmostEqual(sim_sift4('sturgeon', 'urgently'), 0.625)
2655
        self.assertAlmostEqual(sim_sift4('levenshtein', 'frankenstein'), 0.5)
2656
        self.assertAlmostEqual(sim_sift4('distance', 'difference'), 0.5)
2657
2658
        # Tests copied from
2659
        # https://github.com/tdebatty/java-string-similarity/blob/master/src/test/java/info/debatty/java/stringsimilarity/experimental/Sift4Test.java
2660
        self.assertAlmostEqual(sim_sift4('This is the first string',
2661
                                         'And this is another string',
2662
                                         5), 0.576923077)
2663
        self.assertAlmostEqual(sim_sift4('Lorem ipsum dolor sit amet, ' +
2664
                                         'consectetur adipiscing elit.',
2665
                                         'Amet Lorm ispum dolor sit amet, ' +
2666
                                         'consetetur adixxxpiscing elit.',
2667
                                         10), 0.806451613)
2668
2669
        # cases with max_distance
2670
        self.assertAlmostEqual(sim_sift4('example', 'samples', 5, 5),
2671
                               0.285714286)
2672
        self.assertAlmostEqual(sim_sift4('sturgeon', 'urgently', 5, 5), 0.375)
2673
        self.assertAlmostEqual(sim_sift4('levenshtein', 'frankenstein', 5, 5),
2674
                               0.583333333)
2675
        self.assertAlmostEqual(sim_sift4('distance', 'difference', 5, 5), 0.5)
2676
2677
2678
class BaystatTestCases(unittest.TestCase):
2679
    """Test Baystat functions.
2680
2681
    abydos.distance.sim_baystat & .dist_baystat
2682
    """
2683
2684
    def test_sim_baystat(self):
2685
        """Test abydos.distance.sim_editex."""
2686
        # Base cases
2687
        self.assertEqual(sim_baystat('', ''), 1)
2688
        self.assertEqual(sim_baystat('Colin', ''), 0)
2689
        self.assertEqual(sim_baystat('Colin', 'Colin'), 1)
2690
2691
        # Examples given in the paper
2692
        # https://www.statistik.bayern.de/medien/statistik/zensus/zusammenf__hrung_von_datenbest__nden_ohne_numerische_identifikatoren.pdf
2693
        self.assertAlmostEqual(sim_baystat('DRAKOMENA', 'DRAOMINA'), 7/9)
2694
        self.assertAlmostEqual(sim_baystat('RIEKI', 'RILKI'), 4/5)
2695
        self.assertAlmostEqual(sim_baystat('ATANASSIONI', 'ATANASIOU'), 8/11)
2696
        self.assertAlmostEqual(sim_baystat('LIESKOVSKY', 'LIESZKOVSZKY'),
2697
                               10/12)
2698
        self.assertAlmostEqual(sim_baystat('JEANETTE', 'JEANNETTE'), 8/9)
2699
        self.assertAlmostEqual(sim_baystat('JOHANNES', 'JOHAN'), 0.625)
2700
        self.assertAlmostEqual(sim_baystat('JOHANNES', 'HANS'), 0.375)
2701
        self.assertAlmostEqual(sim_baystat('JOHANNES', 'HANNES'), 0.75)
2702
        self.assertAlmostEqual(sim_baystat('ZIMMERMANN', 'SEMMERMANN'), 0.8)
2703
        self.assertAlmostEqual(sim_baystat('ZIMMERMANN', 'ZIMMERER'), 0.6)
2704
        self.assertAlmostEqual(sim_baystat('ZIMMERMANN', 'ZIMMER'), 0.6)
2705
2706
        # Tests to maximize coverage
2707
        self.assertAlmostEqual(sim_baystat('ZIMMERMANN', 'SEMMERMANN',
2708
                                           2, 2, 2), 0.8)
2709
        self.assertAlmostEqual(sim_baystat('ZIMMER', 'ZIMMERMANN'), 0.6)
2710
2711
    def test_dist_baystat(self):
2712
        """Test abydos.distance.dist_editex."""
2713
        # Base cases
2714
        self.assertEqual(dist_baystat('', ''), 0)
2715
        self.assertEqual(dist_baystat('Colin', ''), 1)
2716
        self.assertEqual(dist_baystat('Colin', 'Colin'), 0)
2717
2718
        # Examples given in the paper
2719
        # https://www.statistik.bayern.de/medien/statistik/zensus/zusammenf__hrung_von_datenbest__nden_ohne_numerische_identifikatoren.pdf
2720
        self.assertAlmostEqual(dist_baystat('DRAKOMENA', 'DRAOMINA'), 2/9)
2721
        self.assertAlmostEqual(dist_baystat('RIEKI', 'RILKI'), 1/5)
2722
        self.assertAlmostEqual(dist_baystat('ATANASSIONI', 'ATANASIOU'), 3/11)
2723
        self.assertAlmostEqual(dist_baystat('LIESKOVSKY', 'LIESZKOVSZKY'),
2724
                               2/12)
2725
        self.assertAlmostEqual(dist_baystat('JEANETTE', 'JEANNETTE'), 1/9)
2726
        self.assertAlmostEqual(dist_baystat('JOHANNES', 'JOHAN'), 0.375)
2727
        self.assertAlmostEqual(dist_baystat('JOHANNES', 'HANS'), 0.625)
2728
        self.assertAlmostEqual(dist_baystat('JOHANNES', 'HANNES'), 0.25)
2729
        self.assertAlmostEqual(dist_baystat('ZIMMERMANN', 'SEMMERMANN'), 0.2)
2730
        self.assertAlmostEqual(dist_baystat('ZIMMERMANN', 'ZIMMERER'), 0.4)
2731
        self.assertAlmostEqual(dist_baystat('ZIMMERMANN', 'ZIMMER'), 0.4)
2732
2733
2734
class TypoTestCases(unittest.TestCase):
2735
    """Test Typo functions.
2736
2737
    abydos.distance.typo, sim_typo & .dist_typo
2738
    """
2739
2740
    def test_typo(self):
2741
        """Test abydos.distance.typo."""
2742
        # Base cases
2743
        self.assertEqual(typo('', ''), 0)
2744
        self.assertEqual(typo('', 'typo'), 4)
2745
        self.assertEqual(typo('typo', ''), 4)
2746
2747
        self.assertEqual(typo('asdf', 'zxcv'), 2)
2748
        self.assertEqual(typo('asdf', 'ASDF'), 1)
2749
        self.assertEqual(typo('asdf', 'qsdf'), 0.5)
2750
2751
        self.assertAlmostEqual(typo('asdf', 'asdt', metric='euclidean'),
2752
                               0.70710677)
2753
        self.assertAlmostEqual(typo('asdf', 'asdt', metric='manhattan'),
2754
                               1)
2755
        self.assertAlmostEqual(typo('asdf', 'asdt', metric='log-euclidean'),
2756
                               0.4406868)
2757
        self.assertAlmostEqual(typo('asdf', 'asdt', metric='log-manhattan'),
2758
                               0.54930615)
2759
2760
        self.assertRaises(ValueError, typo, 'asdf', 'Ösdf')
2761
2762
    def test_sim_typo(self):
2763
        """Test abydos.distance.sim_typo."""
2764
        # Base cases
2765
        self.assertEqual(sim_typo('', ''), 1)
2766
        self.assertEqual(sim_typo('', 'typo'), 0)
2767
        self.assertEqual(sim_typo('typo', ''), 0)
2768
2769
        self.assertEqual(sim_typo('asdf', 'zxcv'), 0.5)
2770
        self.assertEqual(sim_typo('asdf', 'ASDF'), 0.75)
2771
        self.assertEqual(sim_typo('asdf', 'qsdf'), 0.875)
2772
2773
        self.assertAlmostEqual(sim_typo('asdf', 'asdt', metric='euclidean'),
2774
                               1-(0.70710677/4))
2775
        self.assertAlmostEqual(sim_typo('asdf', 'asdt', metric='manhattan'),
2776
                               0.75)
2777
        self.assertAlmostEqual(sim_typo('asdf', 'asdt',
2778
                                        metric='log-euclidean'),
2779
                               1-(0.4406868/4))
2780
        self.assertAlmostEqual(sim_typo('asdf', 'asdt',
2781
                                        metric='log-manhattan'),
2782
                               1-(0.54930615/4))
2783
2784
    def test_dist_typo(self):
2785
        """Test abydos.distance.dist_typo."""
2786
        # Base cases
2787
        self.assertEqual(dist_typo('', ''), 0)
2788
        self.assertEqual(dist_typo('', 'typo'), 1)
2789
        self.assertEqual(dist_typo('typo', ''), 1)
2790
2791
        self.assertEqual(dist_typo('asdf', 'zxcv'), 0.5)
2792
        self.assertEqual(dist_typo('asdf', 'ASDF'), 0.25)
2793
        self.assertEqual(dist_typo('asdf', 'qsdf'), 0.125)
2794
2795
        self.assertAlmostEqual(dist_typo('asdf', 'asdt', metric='euclidean'),
2796
                               0.70710677/4)
2797
        self.assertAlmostEqual(dist_typo('asdf', 'asdt', metric='manhattan'),
2798
                               0.25)
2799
        self.assertAlmostEqual(dist_typo('asdf', 'asdt',
2800
                                         metric='log-euclidean'), 0.4406868/4)
2801
        self.assertAlmostEqual(dist_typo('asdf', 'asdt',
2802
                                         metric='log-manhattan'), 0.54930615/4)
2803
2804
2805
class IndelTestCases(unittest.TestCase):
2806
    """Test indel functions.
2807
2808
    abydos.distance.sim_indel & .dist_indel
2809
    """
2810
2811
    def test_sim_indel(self):
2812
        """Test abydos.distance.sim_indel."""
2813
        # Base cases
2814
        self.assertEqual(sim_indel('', ''), 1)
2815
        self.assertEqual(sim_indel('a', ''), 0)
2816
        self.assertEqual(sim_indel('', 'a'), 0)
2817
        self.assertEqual(sim_indel('abc', ''), 0)
2818
        self.assertEqual(sim_indel('', 'abc'), 0)
2819
        self.assertEqual(sim_indel('abcd', 'efgh'), 0)
2820
2821
        self.assertAlmostEqual(sim_indel('Nigel', 'Niall'), 0.6)
2822
        self.assertAlmostEqual(sim_indel('Niall', 'Nigel'), 0.6)
2823
        self.assertAlmostEqual(sim_indel('Colin', 'Coiln'), 0.8)
2824
        self.assertAlmostEqual(sim_indel('Coiln', 'Colin'), 0.8)
2825
2826
    def test_dist_indel(self):
2827
        """Test abydos.distance.dist_indel."""
2828
        # Base cases
2829
        self.assertEqual(dist_indel('', ''), 0)
2830
        self.assertEqual(dist_indel('a', ''), 1)
2831
        self.assertEqual(dist_indel('', 'a'), 1)
2832
        self.assertEqual(dist_indel('abc', ''), 1)
2833
        self.assertEqual(dist_indel('', 'abc'), 1)
2834
        self.assertEqual(dist_indel('abcd', 'efgh'), 1)
2835
2836
        self.assertAlmostEqual(dist_indel('Nigel', 'Niall'), 0.4)
2837
        self.assertAlmostEqual(dist_indel('Niall', 'Nigel'), 0.4)
2838
        self.assertAlmostEqual(dist_indel('Colin', 'Coiln'), 0.2)
2839
        self.assertAlmostEqual(dist_indel('Coiln', 'Colin'), 0.2)
2840
2841
2842
class SynonameTestCases(unittest.TestCase):
2843
    """Test Synoname functions.
2844
2845
    abydos.distance._synoname_strip_punct, synoname_word_approximation, &
2846
    synoname
2847
    """
2848
2849
    def test_synoname_strip_punct(self):
2850
        """Test abydos.distance._synoname_strip_punct."""
2851
        # Base cases
2852
        self.assertEqual(_synoname_strip_punct(''), '')
2853
        self.assertEqual(_synoname_strip_punct('abcdefg'), 'abcdefg')
2854
        self.assertEqual(_synoname_strip_punct('a\'b-c,d!e:f%g'), 'abcdefg')
2855
2856
    def test_synoname_word_approximation(self):
2857
        """Test abydos.distance.synoname_word_approximation."""
2858
        # Base cases
2859
        self.assertEqual(synoname_word_approximation('', ''), 0)
2860
2861
        self.assertEqual(synoname_word_approximation('di Domenico di ' +
2862
                                                     'Bonaventura',
2863
                                                     'di Tomme di Nuto',
2864
                                                     'Cosimo', 'Luca'), 0.4)
2865
        self.assertEqual(synoname_word_approximation('Antonello da Messina',
2866
                                                     'Messina',
2867
                                                     '', 'Antonello da',
2868
                                                     {'gen_conflict': False,
2869
                                                      'roman_conflict': False,
2870
                                                      'src_specials':
2871
                                                          [(35, 'b'),
2872
                                                           (35, 'c')],
2873
                                                      'tar_specials':
2874
                                                          [(35, 'b'),
2875
                                                           (35, 'c')]}), 0)
2876
2877
    def test_synoname(self):
2878
        """Test abydos.distance.synoname."""
2879
        # Base cases
2880
        self.assertEqual(synoname('', ''), 1)
2881
        self.assertEqual(synoname('', '', tests=['exact']), 1)
2882
        self.assertEqual(synoname('', '', tests=[]), 13)
2883
        self.assertEqual(synoname('', '', tests=['nonsense-test']), 13)
2884
        self.assertEqual(synoname('', '', ret_name=True), 'exact')
2885
2886
        # Test input formats
2887
        self.assertEqual(synoname(('Brueghel II (the Younger)', 'Pieter',
2888
                                   'Workshop of'),
2889
                                  ('Brueghel II (the Younger)', 'Pieter',
2890
                                   'Workshop of')), 1)
2891
        self.assertEqual(synoname('Brueghel II (the Younger)#Pieter#' +
2892
                                  'Workshop of',
2893
                                  'Brueghel II (the Younger)#Pieter#' +
2894
                                  'Workshop of'), 1)
2895
        self.assertEqual(synoname('22#Brueghel II (the Younger)#Pieter#' +
2896
                                  'Workshop of',
2897
                                  '44#Brueghel II (the Younger)#Pieter#' +
2898
                                  'Workshop of'), 1)
2899
2900
        # approx_c tests
2901
        self.assertEqual(synoname(('Master of Brueghel II (the Younger)',
2902
                                   'Pieter', 'Workshop of'),
2903
                                  ('Brueghel I (the Elder)', 'Pieter',
2904
                                   'Workshop of')), 13)
2905
        self.assertEqual(synoname(('Master of Brueghel II',
2906
                                   'Pieter', 'Workshop of'),
2907
                                  ('Master known as the Brueghel II', 'Pieter',
2908
                                   'Workshop of')), 10)
2909
2910
        # Types 1-12
2911
        self.assertEqual(synoname(('Brueghel', 'Pieter', ''),
2912
                                  ('Brueghel', 'Pieter', ''),
2913
                                  ret_name=True), 'exact')
2914
2915
        self.assertEqual(synoname(('Brueghel II', 'Pieter', ''),
2916
                                  ('Brueghel I', 'Pieter', ''),
2917
                                  ret_name=True), 'no_match')
2918
        self.assertEqual(synoname(('Breghel', 'Pieter', ''),
2919
                                  ('Brueghel', 'Pieter', ''),
2920
                                  ret_name=True), 'omission')
2921
        self.assertEqual(synoname(('Brueghel', 'Pieter', ''),
2922
                                  ('Breghel', 'Pieter', ''),
2923
                                  ret_name=True), 'omission')
2924
        self.assertEqual(synoname(('Brueghel', 'Piter', ''),
2925
                                  ('Brueghel', 'Pieter', ''),
2926
                                  ret_name=True), 'omission')
2927
        self.assertEqual(synoname(('Brueghel', 'Pieter', ''),
2928
                                  ('Brueghel', 'Piter', ''),
2929
                                  ret_name=True), 'omission')
2930
        self.assertEqual(synoname(('Brughel', 'Pieter', ''),
2931
                                  ('Breghel', 'Pieter', ''),
2932
                                  ret_name=True), 'substitution')
2933
        self.assertEqual(synoname(('Breughel', 'Peter', ''),
2934
                                  ('Breughel', 'Piter', ''),
2935
                                  ret_name=True), 'substitution')
2936
        self.assertEqual(synoname(('Brueghel', 'Pieter', ''),
2937
                                  ('Breughel', 'Pieter', ''),
2938
                                  ret_name=True), 'transposition')
2939
        self.assertEqual(synoname(('Brueghel', 'Peiter', ''),
2940
                                  ('Brueghel', 'Pieter', ''),
2941
                                  ret_name=True), 'transposition')
2942
2943
        self.assertEqual(synoname(('Brueghel:', 'Pieter', ''),
2944
                                  ('Brueghel', 'Pi-eter', ''),
2945
                                  ret_name=True), 'punctuation')
2946
        self.assertEqual(synoname(('Brueghel,', 'Pieter', ''),
2947
                                  ('Brueghel', 'Pieter...', ''),
2948
                                  ret_name=True), 'punctuation')
2949
        self.assertEqual(synoname(('Seu rat', 'George Pierre', ''),
2950
                                  ('Seu-rat', 'George-Pierre', ''),
2951
                                  ret_name=True), 'punctuation')
2952
        self.assertEqual(synoname(('Picasso', '', ''),
2953
                                  ('Picasso', 'Pablo', ''),
2954
                                  ret_name=True), 'no_first')
2955
        self.assertEqual(synoname(('Pereira', 'I. R.', ''),
2956
                                  ('Pereira', 'Irene Rice', ''),
2957
                                  ret_name=True), 'initials')
2958
        self.assertNotEqual(synoname(('Pereira', 'I. R.', ''),
2959
                                     ('Pereira', 'I. Smith', ''),
2960
                                     ret_name=True), 'initials')
2961
        self.assertNotEqual(synoname(('Pereira', 'I. R. S.', ''),
2962
                                     ('Pereira', 'I. S. R.', ''),
2963
                                     ret_name=True), 'initials')
2964
        self.assertEqual(synoname(('de Goya', 'Francisco', ''),
2965
                                  ('de Goya y Lucientes', 'Francisco', ''),
2966
                                  ret_name=True), 'extension')
2967
        self.assertEqual(synoname(('Seurat', 'George', ''),
2968
                                  ('Seurat', 'George-Pierre', ''),
2969
                                  ret_name=True), 'extension')
2970
        self.assertEqual(synoname(('Gericault', 'Theodore', ''),
2971
                                  ('Gericault', 'Jean Louis Andre Theodore',
2972
                                   ''),
2973
                                  ret_name=True), 'inclusion')
2974
        self.assertEqual(synoname(('Dore', 'Gustave', ''),
2975
                                  ('Dore', 'Paul Gustave Louis Christophe',
2976
                                   ''),
2977
                                  ret_name=True), 'inclusion')
2978
2979
        self.assertEqual(synoname(('Rosetti', 'Dante Gabriel', ''),
2980
                                  ('Rosetti', 'Gabriel Charles Dante', ''),
2981
                                  ret_name=True), 'word_approx')
2982
        self.assertEqual(synoname(('di Domenico di Bonaventura', 'Cosimo', ''),
2983
                                  ('di Tomme di Nuto', 'Luca', ''),
2984
                                  ret_name=True), 'no_match')
2985
        self.assertEqual(synoname(('Pereira', 'I. R.', ''),
2986
                                  ('Pereira', 'I. Smith', ''),
2987
                                  ret_name=True), 'word_approx')
2988
        self.assertEqual(synoname(('Antonello da Messina', '', ''),
2989
                                  ('Messina', 'Antonello da', ''),
2990
                                  ret_name=True), 'confusions')
2991
        self.assertEqual(synoname(('Brueghel', 'Pietter', ''),
2992
                                  ('Bruegghel', 'Pieter', ''),
2993
                                  ret_name=True), 'char_approx')
2994
2995
2996
class SimDistTestCases(unittest.TestCase):
2997
    """Test generic sim & dist functions.
2998
2999
    abydos.distance.sim & .dist
3000
    """
3001
3002
    def test_sim(self):
3003
        """Test abydos.distance.sim."""
3004
        self.assertEqual(sim('Niall', 'Nigel'),
3005
                         sim_levenshtein('Niall', 'Nigel'))
3006
        self.assertRaises(AttributeError, sim, 'abc', 'abc', 0)
3007
3008
    def test_dist(self):
3009
        """Test abydos.distance.dist."""
3010
        self.assertEqual(dist('Niall', 'Nigel'),
3011
                         dist_levenshtein('Niall', 'Nigel'))
3012
        self.assertRaises(AttributeError, dist, 'abc', 'abc', 0)
3013
3014
3015
if __name__ == '__main__':
3016
    unittest.main()
3017