Completed
Push — master ( c4f92b...0bbb8a )
by Chris
12:28
created

BaystatTestCases.test_sim_baystat()   A

Complexity

Conditions 1

Size

Total Lines 21
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 16
nop 1
dl 0
loc 21
rs 9.6
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.tests.test_distance.
20
21
This module contains unit tests for abydos.distance
22
"""
23
24
from __future__ import division, unicode_literals
25
26
import math
27
import os
28
import unittest
29
from difflib import SequenceMatcher
30
31
from abydos.compression import ac_train
32
from abydos.distance import bag, damerau_levenshtein, dist, dist_bag, \
33
    dist_baystat, dist_compression, dist_cosine, dist_damerau, dist_dice, \
34
    dist_editex, dist_hamming, dist_ident, dist_jaccard, dist_jaro_winkler, \
35
    dist_lcsseq,  dist_lcsstr, dist_length, dist_levenshtein, dist_mlipns, \
36
    dist_monge_elkan, dist_mra, dist_overlap, dist_prefix, \
37
    dist_ratcliff_obershelp, dist_strcmp95, dist_suffix, dist_tversky, \
38
    editex, gotoh, hamming, lcsseq, lcsstr, levenshtein, mra_compare, \
39
    needleman_wunsch, sim, sim_bag, sim_baystat, sim_compression, sim_cosine, \
40
    sim_damerau, sim_dice, sim_editex, sim_hamming, sim_ident, sim_jaccard, \
41
    sim_jaro_winkler, sim_lcsseq, sim_lcsstr, sim_length, sim_levenshtein, \
42
    sim_matrix, sim_mlipns, sim_monge_elkan, sim_mra, sim_overlap, \
43
    sim_prefix, sim_ratcliff_obershelp, sim_strcmp95, sim_suffix, \
44
    sim_tanimoto, sim_tversky, smith_waterman, tanimoto
45
from abydos.qgram import QGrams
46
47
from six.moves import range
48
49
TESTDIR = os.path.dirname(__file__)
50
51
NIALL = ('Niall', 'Neal', 'Neil', 'Njall', 'Njáll', 'Nigel', 'Neel', 'Nele',
52
         'Nigelli', 'Nel', 'Kneale', 'Uí Néill', 'O\'Neill', 'MacNeil',
53
         'MacNele', 'Niall Noígíallach')
54
55
COLIN = ('Colin', 'Collin', 'Cullen', 'Cuilen', 'Cailean', 'MacCailean',
56
         'Cuilén', 'Colle', 'Calum', 'Callum', 'Colinn', 'Colon', 'Colynn',
57
         'Col', 'Cole', 'Nicolas', 'Nicholas', 'Cailean Mór Caimbeul')
58
59
60
class LevenshteinTestCases(unittest.TestCase):
61
    """Test Levenshtein functions.
62
63
    abydos.distance.levenshtein, .dist_levenshtein,
64
    .sim_levenshtein, .damerau, .dist_damerau, & .sim_damerau
65
    """
66
67
    def test_levenshtein(self):
68
        """Test abydos.distance.levenshtein."""
69
        self.assertEqual(levenshtein('', ''), 0)
70
71
        # http://oldfashionedsoftware.com/tag/levenshtein-distance/
72
        self.assertEqual(levenshtein('a', ''), 1)
73
        self.assertEqual(levenshtein('', 'a'), 1)
74
        self.assertEqual(levenshtein('abc', ''), 3)
75
        self.assertEqual(levenshtein('', 'abc'), 3)
76
        self.assertEqual(levenshtein('', ''), 0)
77
        self.assertEqual(levenshtein('a', 'a'), 0)
78
        self.assertEqual(levenshtein('abc', 'abc'), 0)
79
        self.assertEqual(levenshtein('', 'a'), 1)
80
        self.assertEqual(levenshtein('a', 'ab'), 1)
81
        self.assertEqual(levenshtein('b', 'ab'), 1)
82
        self.assertEqual(levenshtein('ac', 'abc'), 1)
83
        self.assertEqual(levenshtein('abcdefg', 'xabxcdxxefxgx'), 6)
84
        self.assertEqual(levenshtein('a', ''), 1)
85
        self.assertEqual(levenshtein('ab', 'a'), 1)
86
        self.assertEqual(levenshtein('ab', 'b'), 1)
87
        self.assertEqual(levenshtein('abc', 'ac'), 1)
88
        self.assertEqual(levenshtein('xabxcdxxefxgx', 'abcdefg'), 6)
89
        self.assertEqual(levenshtein('a', 'b'), 1)
90
        self.assertEqual(levenshtein('ab', 'ac'), 1)
91
        self.assertEqual(levenshtein('ac', 'bc'), 1)
92
        self.assertEqual(levenshtein('abc', 'axc'), 1)
93
        self.assertEqual(levenshtein('xabxcdxxefxgx', '1ab2cd34ef5g6'), 6)
94
        self.assertEqual(levenshtein('example', 'samples'), 3)
95
        self.assertEqual(levenshtein('sturgeon', 'urgently'), 6)
96
        self.assertEqual(levenshtein('levenshtein', 'frankenstein'), 6)
97
        self.assertEqual(levenshtein('distance', 'difference'), 5)
98
        self.assertEqual(levenshtein('java was neat', 'scala is great'), 7)
99
100
        # https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
101
        self.assertEqual(levenshtein('CA', 'ABC', 'dam'), 2)
102
        self.assertEqual(levenshtein('CA', 'ABC', 'osa'), 3)
103
104
        # test cost of insert
105
        self.assertEqual(levenshtein('', 'b', 'lev', cost=(5, 7, 10, 10)), 5)
106
        self.assertEqual(levenshtein('', 'b', 'osa', cost=(5, 7, 10, 10)), 5)
107
        self.assertEqual(levenshtein('', 'b', 'dam', cost=(5, 7, 10, 10)), 5)
108
        self.assertEqual(levenshtein('a', 'ab', 'lev', cost=(5, 7, 10, 10)), 5)
109
        self.assertEqual(levenshtein('a', 'ab', 'osa', cost=(5, 7, 10, 10)), 5)
110
        self.assertEqual(levenshtein('a', 'ab', 'dam', cost=(5, 7, 10, 10)), 5)
111
112
        # test cost of delete
113
        self.assertEqual(levenshtein('b', '', 'lev', cost=(5, 7, 10, 10)), 7)
114
        self.assertEqual(levenshtein('b', '', 'osa', cost=(5, 7, 10, 10)), 7)
115
        self.assertEqual(levenshtein('b', '', 'dam', cost=(5, 7, 10, 10)), 7)
116
        self.assertEqual(levenshtein('ab', 'a', 'lev', cost=(5, 7, 10, 10)), 7)
117
        self.assertEqual(levenshtein('ab', 'a', 'osa', cost=(5, 7, 10, 10)), 7)
118
        self.assertEqual(levenshtein('ab', 'a', 'dam', cost=(5, 7, 10, 10)), 7)
119
120
        # test cost of substitute
121
        self.assertEqual(levenshtein('a', 'b', 'lev', cost=(10, 10, 5, 10)), 5)
122
        self.assertEqual(levenshtein('a', 'b', 'osa', cost=(10, 10, 5, 10)), 5)
123
        self.assertEqual(levenshtein('a', 'b', 'dam', cost=(10, 10, 5, 10)), 5)
124
        self.assertEqual(levenshtein('ac', 'bc', 'lev',
125
                                     cost=(10, 10, 5, 10)), 5)
126
        self.assertEqual(levenshtein('ac', 'bc', 'osa',
127
                                     cost=(10, 10, 5, 10)), 5)
128
        self.assertEqual(levenshtein('ac', 'bc', 'dam',
129
                                     cost=(10, 10, 5, 10)), 5)
130
131
        # test cost of transpose
132
        self.assertEqual(levenshtein('ab', 'ba', 'lev',
133
                                     cost=(10, 10, 10, 5)), 20)
134
        self.assertEqual(levenshtein('ab', 'ba', 'osa',
135
                                     cost=(10, 10, 10, 5)), 5)
136
        self.assertEqual(levenshtein('ab', 'ba', 'dam',
137
                                     cost=(5, 5, 10, 5)), 5)
138
        self.assertEqual(levenshtein('abc', 'bac', 'lev',
139
                                     cost=(10, 10, 10, 5)), 20)
140
        self.assertEqual(levenshtein('abc', 'bac', 'osa',
141
                                     cost=(10, 10, 10, 5)), 5)
142
        self.assertEqual(levenshtein('abc', 'bac', 'dam',
143
                                     cost=(5, 5, 10, 5)), 5)
144
        self.assertEqual(levenshtein('cab', 'cba', 'lev',
145
                                     cost=(10, 10, 10, 5)), 20)
146
        self.assertEqual(levenshtein('cab', 'cba', 'osa',
147
                                     cost=(10, 10, 10, 5)), 5)
148
        self.assertEqual(levenshtein('cab', 'cba', 'dam',
149
                                     cost=(5, 5, 10, 5)), 5)
150
151
        # test exception
152
        self.assertRaises(ValueError, levenshtein, 'ab', 'ba', 'dam',
153
                          cost=(10, 10, 10, 5))
154
155
    def test_dist_levenshtein(self):
156
        """Test abydos.distance.dist_levenshtein."""
157
        self.assertEqual(dist_levenshtein('', ''), 0)
158
159
        self.assertEqual(dist_levenshtein('a', 'a'), 0)
160
        self.assertEqual(dist_levenshtein('ab', 'ab'), 0)
161
        self.assertEqual(dist_levenshtein('', 'a'), 1)
162
        self.assertEqual(dist_levenshtein('', 'ab'), 1)
163
        self.assertEqual(dist_levenshtein('a', 'c'), 1)
164
165
        self.assertAlmostEqual(dist_levenshtein('abc', 'ac'), 1/3)
166
        self.assertAlmostEqual(dist_levenshtein('abbc', 'ac'), 1/2)
167
        self.assertAlmostEqual(dist_levenshtein('abbc', 'abc'), 1/4)
168
169
    def test_sim_levenshtein(self):
170
        """Test abydos.distance.sim_levenshtein."""
171
        self.assertEqual(sim_levenshtein('', ''), 1)
172
173
        self.assertEqual(sim_levenshtein('a', 'a'), 1)
174
        self.assertEqual(sim_levenshtein('ab', 'ab'), 1)
175
        self.assertEqual(sim_levenshtein('', 'a'), 0)
176
        self.assertEqual(sim_levenshtein('', 'ab'), 0)
177
        self.assertEqual(sim_levenshtein('a', 'c'), 0)
178
179
        self.assertAlmostEqual(sim_levenshtein('abc', 'ac'), 2/3)
180
        self.assertAlmostEqual(sim_levenshtein('abbc', 'ac'), 1/2)
181
        self.assertAlmostEqual(sim_levenshtein('abbc', 'abc'), 3/4)
182
183
    def test_damerau_levenshtein(self):
184
        """Test abydos.distance.damerau_levenshtein."""
185
        self.assertEqual(damerau_levenshtein('', ''), 0)
186
        self.assertEqual(damerau_levenshtein('CA', 'CA'), 0)
187
        self.assertEqual(damerau_levenshtein('CA', 'ABC'), 2)
188
        self.assertEqual(damerau_levenshtein('', 'b', cost=(5, 7, 10, 10)), 5)
189
        self.assertEqual(damerau_levenshtein('a', 'ab', cost=(5, 7, 10, 10)),
190
                         5)
191
        self.assertEqual(damerau_levenshtein('b', '', cost=(5, 7, 10, 10)), 7)
192
        self.assertEqual(damerau_levenshtein('ab', 'a', cost=(5, 7, 10, 10)),
193
                         7)
194
        self.assertEqual(damerau_levenshtein('a', 'b', cost=(10, 10, 5, 10)),
195
                         5)
196
        self.assertEqual(damerau_levenshtein('ac', 'bc',
197
                                             cost=(10, 10, 5, 10)), 5)
198
        self.assertEqual(damerau_levenshtein('ab', 'ba',
199
                                             cost=(5, 5, 10, 5)), 5)
200
        self.assertEqual(damerau_levenshtein('abc', 'bac',
201
                                             cost=(5, 5, 10, 5)), 5)
202
        self.assertEqual(damerau_levenshtein('cab', 'cba',
203
                                             cost=(5, 5, 10, 5)), 5)
204
        self.assertRaises(ValueError, damerau_levenshtein, 'ab', 'ba',
205
                          cost=(10, 10, 10, 5))
206
207
    def test_dist_damerau(self):
208
        """Test abydos.distance.dist_damerau."""
209
        self.assertEqual(dist_damerau('', ''), 0)
210
211
        self.assertEqual(dist_damerau('a', 'a'), 0)
212
        self.assertEqual(dist_damerau('ab', 'ab'), 0)
213
        self.assertEqual(dist_damerau('', 'a'), 1)
214
        self.assertEqual(dist_damerau('', 'ab'), 1)
215
        self.assertEqual(dist_damerau('a', 'c'), 1)
216
217
        self.assertAlmostEqual(dist_damerau('abc', 'ac'), 1/3)
218
        self.assertAlmostEqual(dist_damerau('abbc', 'ac'), 1/2)
219
        self.assertAlmostEqual(dist_damerau('abbc', 'abc'), 1/4)
220
221
        self.assertAlmostEqual(dist_damerau('CA', 'ABC'), 2/3)
222
        self.assertAlmostEqual(dist_damerau('', 'b', cost=(5, 7, 10, 10)), 1)
223
        self.assertAlmostEqual(dist_damerau('a', 'ab',
224
                                            cost=(5, 7, 10, 10)), 1/2)
225
        self.assertAlmostEqual(dist_damerau('b', '', cost=(5, 7, 10, 10)), 1)
226
        self.assertAlmostEqual(dist_damerau('ab', 'a',
227
                                            cost=(5, 7, 10, 10)), 1/2)
228
        self.assertAlmostEqual(dist_damerau('a', 'b',
229
                                            cost=(10, 10, 5, 10)), 1/2)
230
        self.assertAlmostEqual(dist_damerau('ac', 'bc',
231
                                            cost=(10, 10, 5, 10)), 1/4)
232
        self.assertAlmostEqual(dist_damerau('ab', 'ba',
233
                                            cost=(5, 5, 10, 5)), 1/2)
234
        self.assertAlmostEqual(dist_damerau('abc', 'bac',
235
                                            cost=(5, 5, 10, 5)), 1/3)
236
        self.assertAlmostEqual(dist_damerau('cab', 'cba',
237
                                            cost=(5, 5, 10, 5)), 1/3)
238
        self.assertRaises(ValueError, dist_damerau, 'ab', 'ba',
239
                          cost=(10, 10, 10, 5))
240
241
    def test_sim_damerau(self):
242
        """Test abydos.distance.sim_damerau."""
243
        self.assertEqual(sim_damerau('', ''), 1)
244
245
        self.assertEqual(sim_damerau('a', 'a'), 1)
246
        self.assertEqual(sim_damerau('ab', 'ab'), 1)
247
        self.assertEqual(sim_damerau('', 'a'), 0)
248
        self.assertEqual(sim_damerau('', 'ab'), 0)
249
        self.assertEqual(sim_damerau('a', 'c'), 0)
250
251
        self.assertAlmostEqual(sim_damerau('abc', 'ac'), 2/3)
252
        self.assertAlmostEqual(sim_damerau('abbc', 'ac'), 1/2)
253
        self.assertAlmostEqual(sim_damerau('abbc', 'abc'), 3/4)
254
255
        self.assertAlmostEqual(sim_damerau('CA', 'ABC'), 1/3)
256
        self.assertAlmostEqual(sim_damerau('', 'b', cost=(5, 7, 10, 10)), 0)
257
        self.assertAlmostEqual(sim_damerau('a', 'ab', cost=(5, 7, 10, 10)),
258
                               1/2)
259
        self.assertAlmostEqual(sim_damerau('b', '', cost=(5, 7, 10, 10)), 0)
260
        self.assertAlmostEqual(sim_damerau('ab', 'a', cost=(5, 7, 10, 10)),
261
                               1/2)
262
        self.assertAlmostEqual(sim_damerau('a', 'b', cost=(10, 10, 5, 10)),
263
                               1/2)
264
        self.assertAlmostEqual(sim_damerau('ac', 'bc',
265
                                           cost=(10, 10, 5, 10)), 3/4)
266
        self.assertAlmostEqual(sim_damerau('ab', 'ba',
267
                                           cost=(5, 5, 10, 5)), 1/2)
268
        self.assertAlmostEqual(sim_damerau('abc', 'bac',
269
                                           cost=(5, 5, 10, 5)), 2/3)
270
        self.assertAlmostEqual(sim_damerau('cab', 'cba',
271
                                           cost=(5, 5, 10, 5)), 2/3)
272
        self.assertRaises(ValueError, sim_damerau, 'ab', 'ba',
273
                          cost=(10, 10, 10, 5))
274
275
276
class HammingTestCases(unittest.TestCase):
277
    """Test Hamming functions.
278
279
    abydos.distance.hamming, .dist_hamming, & .sim_hamming
280
    """
281
282
    def test_hamming(self):
283
        """Test abydos.distance.hamming."""
284
        self.assertEqual(hamming('', ''), 0)
285
        self.assertEqual(hamming('', '', False), 0)
286
287
        self.assertEqual(hamming('a', ''), 1)
288
        self.assertEqual(hamming('a', 'a'), 0)
289
        self.assertEqual(hamming('a', 'a', False), 0)
290
        self.assertEqual(hamming('a', 'b'), 1)
291
        self.assertEqual(hamming('a', 'b', False), 1)
292
        self.assertEqual(hamming('abc', 'cba'), 2)
293
        self.assertEqual(hamming('abc', 'cba', False), 2)
294
        self.assertEqual(hamming('abc', ''), 3)
295
        self.assertEqual(hamming('bb', 'cbab'), 3)
296
297
        # test exception
298
        self.assertRaises(ValueError, hamming, 'ab', 'a', False)
299
300
        # https://en.wikipedia.org/wiki/Hamming_distance
301
        self.assertEqual(hamming('karolin', 'kathrin'), 3)
302
        self.assertEqual(hamming('karolin', 'kerstin'), 3)
303
        self.assertEqual(hamming('1011101', '1001001'), 2)
304
        self.assertEqual(hamming('2173896', '2233796'), 3)
305
306
    def test_dist_hamming(self):
307
        """Test abydos.distance.dist_hamming."""
308
        self.assertEqual(dist_hamming('', ''), 0)
309
        self.assertEqual(dist_hamming('', '', False), 0)
310
311
        self.assertEqual(dist_hamming('a', ''), 1)
312
        self.assertEqual(dist_hamming('a', 'a'), 0)
313
        self.assertEqual(dist_hamming('a', 'a', False), 0)
314
        self.assertEqual(dist_hamming('a', 'b'), 1)
315
        self.assertEqual(dist_hamming('a', 'b', False), 1)
316
        self.assertAlmostEqual(dist_hamming('abc', 'cba'), 2/3)
317
        self.assertAlmostEqual(dist_hamming('abc', 'cba', False), 2/3)
318
        self.assertEqual(dist_hamming('abc', ''), 1)
319
        self.assertAlmostEqual(dist_hamming('bb', 'cbab'), 3/4)
320
321
        # test exception
322
        self.assertRaises(ValueError, dist_hamming, 'ab', 'a', False)
323
324
        # https://en.wikipedia.org/wiki/Hamming_distance
325
        self.assertAlmostEqual(dist_hamming('karolin', 'kathrin'), 3/7)
326
        self.assertAlmostEqual(dist_hamming('karolin', 'kerstin'), 3/7)
327
        self.assertAlmostEqual(dist_hamming('1011101', '1001001'), 2/7)
328
        self.assertAlmostEqual(dist_hamming('2173896', '2233796'), 3/7)
329
330
    def test_sim_hamming(self):
331
        """Test abydos.distance.sim_hamming."""
332
        self.assertEqual(sim_hamming('', ''), 1)
333
        self.assertEqual(sim_hamming('', '', False), 1)
334
335
        self.assertEqual(sim_hamming('a', ''), 0)
336
        self.assertEqual(sim_hamming('a', 'a'), 1)
337
        self.assertEqual(sim_hamming('a', 'a', False), 1)
338
        self.assertEqual(sim_hamming('a', 'b'), 0)
339
        self.assertEqual(sim_hamming('a', 'b', False), 0)
340
        self.assertAlmostEqual(sim_hamming('abc', 'cba'), 1/3)
341
        self.assertAlmostEqual(sim_hamming('abc', 'cba', False), 1/3)
342
        self.assertEqual(sim_hamming('abc', ''), 0)
343
        self.assertAlmostEqual(sim_hamming('bb', 'cbab'), 1/4)
344
345
        # test exception
346
        self.assertRaises(ValueError, sim_hamming, 'ab', 'a', False)
347
348
        # https://en.wikipedia.org/wiki/Hamming_distance
349
        self.assertAlmostEqual(sim_hamming('karolin', 'kathrin'), 4/7)
350
        self.assertAlmostEqual(sim_hamming('karolin', 'kerstin'), 4/7)
351
        self.assertAlmostEqual(sim_hamming('1011101', '1001001'), 5/7)
352
        self.assertAlmostEqual(sim_hamming('2173896', '2233796'), 4/7)
353
354
355
NONQ_FROM = 'The quick brown fox jumped over the lazy dog.'
356
NONQ_TO = 'That brown dog jumped over the fox.'
357
358
359
class TverskyIndexTestCases(unittest.TestCase):
360
    """Test Tversky functions.
361
362
    abydos.distance.sim_tversky & .dist_tversky
363
    """
364
365
    def test_sim_tversky(self):
366
        """Test abydos.distance.sim_tversky."""
367
        self.assertEqual(sim_tversky('', ''), 1)
368
        self.assertEqual(sim_tversky('nelson', ''), 0)
369
        self.assertEqual(sim_tversky('', 'neilsen'), 0)
370
        self.assertAlmostEqual(sim_tversky('nelson', 'neilsen'), 4/11)
371
372
        self.assertEqual(sim_tversky('', '', 2), 1)
373
        self.assertEqual(sim_tversky('nelson', '', 2), 0)
374
        self.assertEqual(sim_tversky('', 'neilsen', 2), 0)
375
        self.assertAlmostEqual(sim_tversky('nelson', 'neilsen', 2), 4/11)
376
377
        # test valid alpha & beta
378
        self.assertRaises(ValueError, sim_tversky, 'abcd', 'dcba', 2, -1, -1)
379
        self.assertRaises(ValueError, sim_tversky, 'abcd', 'dcba', 2, -1, 0)
380
        self.assertRaises(ValueError, sim_tversky, 'abcd', 'dcba', 2, 0, -1)
381
382
        # test empty QGrams
383
        self.assertAlmostEqual(sim_tversky('nelson', 'neilsen', 7), 0.0)
384
385
        # test unequal alpha & beta
386
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 1), 3/11)
387
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 1, 2), 3/10)
388
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 2), 3/13)
389
390
        # test bias parameter
391
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 1, 1, 0.5),
392
                               7/11)
393
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 1, 0.5), 7/9)
394
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 1, 2, 0.5),
395
                               7/15)
396
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 2, 0.5),
397
                               7/11)
398
399
        # supplied q-gram tests
400
        self.assertEqual(sim_tversky(QGrams(''), QGrams('')), 1)
401
        self.assertEqual(sim_tversky(QGrams('nelson'), QGrams('')), 0)
402
        self.assertEqual(sim_tversky(QGrams(''), QGrams('neilsen')), 0)
403
        self.assertAlmostEqual(sim_tversky(QGrams('nelson'),
404
                                           QGrams('neilsen')), 4/11)
405
406
        # non-q-gram tests
407
        self.assertEqual(sim_tversky('', '', None), 1)
408
        self.assertEqual(sim_tversky('the quick', '', None), 0)
409
        self.assertEqual(sim_tversky('', 'the quick', None), 0)
410
        self.assertAlmostEqual(sim_tversky(NONQ_FROM, NONQ_TO, None), 1/3)
411
        self.assertAlmostEqual(sim_tversky(NONQ_TO, NONQ_FROM, None), 1/3)
412
413
    def test_dist_tversky(self):
414
        """Test abydos.distance.dist_tversky."""
415
        self.assertEqual(dist_tversky('', ''), 0)
416
        self.assertEqual(dist_tversky('nelson', ''), 1)
417
        self.assertEqual(dist_tversky('', 'neilsen'), 1)
418
        self.assertAlmostEqual(dist_tversky('nelson', 'neilsen'), 7/11)
419
420
        self.assertEqual(dist_tversky('', '', 2), 0)
421
        self.assertEqual(dist_tversky('nelson', '', 2), 1)
422
        self.assertEqual(dist_tversky('', 'neilsen', 2), 1)
423
        self.assertAlmostEqual(dist_tversky('nelson', 'neilsen', 2), 7/11)
424
425
        # test valid alpha & beta
426
        self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, -1, -1)
427
        self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, -1, 0)
428
        self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, 0, -1)
429
430
        # test empty QGrams
431
        self.assertAlmostEqual(dist_tversky('nelson', 'neilsen', 7), 1.0)
432
433
        # test unequal alpha & beta
434
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 1), 8/11)
435
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 2), 7/10)
436
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 2), 10/13)
437
438
        # test bias parameter
439
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 1, 0.5),
440
                               4/11)
441
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 1, 0.5),
442
                               2/9)
443
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 2, 0.5),
444
                               8/15)
445
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 2, 0.5),
446
                               4/11)
447
448
        # supplied q-gram tests
449
        self.assertEqual(dist_tversky(QGrams(''), QGrams('')), 0)
450
        self.assertEqual(dist_tversky(QGrams('nelson'), QGrams('')), 1)
451
        self.assertEqual(dist_tversky(QGrams(''), QGrams('neilsen')), 1)
452
        self.assertAlmostEqual(dist_tversky(QGrams('nelson'),
453
                                            QGrams('neilsen')), 7/11)
454
455
        # non-q-gram tests
456
        self.assertEqual(dist_tversky('', '', None), 0)
457
        self.assertEqual(dist_tversky('the quick', '', None), 1)
458
        self.assertEqual(dist_tversky('', 'the quick', None), 1)
459
        self.assertAlmostEqual(dist_tversky(NONQ_FROM, NONQ_TO, None), 2/3)
460
        self.assertAlmostEqual(dist_tversky(NONQ_TO, NONQ_FROM, None), 2/3)
461
462
463
class DiceTestCases(unittest.TestCase):
464
    """Test Dice functions.
465
466
    abydos.distance.sim_dice & .dist_dice
467
    """
468
469
    def test_sim_dice(self):
470
        """Test abydos.distance.sim_dice."""
471
        self.assertEqual(sim_dice('', ''), 1)
472
        self.assertEqual(sim_dice('nelson', ''), 0)
473
        self.assertEqual(sim_dice('', 'neilsen'), 0)
474
        self.assertAlmostEqual(sim_dice('nelson', 'neilsen'), 8/15)
475
476
        self.assertEqual(sim_dice('', '', 2), 1)
477
        self.assertEqual(sim_dice('nelson', '', 2), 0)
478
        self.assertEqual(sim_dice('', 'neilsen', 2), 0)
479
        self.assertAlmostEqual(sim_dice('nelson', 'neilsen', 2), 8/15)
480
481
        # supplied q-gram tests
482
        self.assertEqual(sim_dice(QGrams(''), QGrams('')), 1)
483
        self.assertEqual(sim_dice(QGrams('nelson'), QGrams('')), 0)
484
        self.assertEqual(sim_dice(QGrams(''), QGrams('neilsen')), 0)
485
        self.assertAlmostEqual(sim_dice(QGrams('nelson'), QGrams('neilsen')),
486
                               8/15)
487
488
        # non-q-gram tests
489
        self.assertEqual(sim_dice('', '', None), 1)
490
        self.assertEqual(sim_dice('the quick', '', None), 0)
491
        self.assertEqual(sim_dice('', 'the quick', None), 0)
492
        self.assertAlmostEqual(sim_dice(NONQ_FROM, NONQ_TO, None), 1/2)
493
        self.assertAlmostEqual(sim_dice(NONQ_TO, NONQ_FROM, None), 1/2)
494
495
    def test_dist_dice(self):
496
        """Test abydos.distance.dist_dice."""
497
        self.assertEqual(dist_dice('', ''), 0)
498
        self.assertEqual(dist_dice('nelson', ''), 1)
499
        self.assertEqual(dist_dice('', 'neilsen'), 1)
500
        self.assertAlmostEqual(dist_dice('nelson', 'neilsen'), 7/15)
501
502
        self.assertEqual(dist_dice('', '', 2), 0)
503
        self.assertEqual(dist_dice('nelson', '', 2), 1)
504
        self.assertEqual(dist_dice('', 'neilsen', 2), 1)
505
        self.assertAlmostEqual(dist_dice('nelson', 'neilsen', 2), 7/15)
506
507
        # supplied q-gram tests
508
        self.assertEqual(dist_dice(QGrams(''), QGrams('')), 0)
509
        self.assertEqual(dist_dice(QGrams('nelson'), QGrams('')), 1)
510
        self.assertEqual(dist_dice(QGrams(''), QGrams('neilsen')), 1)
511
        self.assertAlmostEqual(dist_dice(QGrams('nelson'), QGrams('neilsen')),
512
                               7/15)
513
514
        # non-q-gram tests
515
        self.assertEqual(dist_dice('', '', None), 0)
516
        self.assertEqual(dist_dice('the quick', '', None), 1)
517
        self.assertEqual(dist_dice('', 'the quick', None), 1)
518
        self.assertAlmostEqual(dist_dice(NONQ_FROM, NONQ_TO, None), 1/2)
519
        self.assertAlmostEqual(dist_dice(NONQ_TO, NONQ_FROM, None), 1/2)
520
521
522
class JaccardTestCases(unittest.TestCase):
523
    """Test Jaccard functions.
524
525
    abydos.distance.sim_jaccard & .dist_jaccard
526
    """
527
528
    def test_sim_jaccard(self):
529
        """Test abydos.distance.sim_jaccard."""
530
        self.assertEqual(sim_jaccard('', ''), 1)
531
        self.assertEqual(sim_jaccard('nelson', ''), 0)
532
        self.assertEqual(sim_jaccard('', 'neilsen'), 0)
533
        self.assertAlmostEqual(sim_jaccard('nelson', 'neilsen'), 4/11)
534
535
        self.assertEqual(sim_jaccard('', '', 2), 1)
536
        self.assertEqual(sim_jaccard('nelson', '', 2), 0)
537
        self.assertEqual(sim_jaccard('', 'neilsen', 2), 0)
538
        self.assertAlmostEqual(sim_jaccard('nelson', 'neilsen', 2), 4/11)
539
540
        # supplied q-gram tests
541
        self.assertEqual(sim_jaccard(QGrams(''), QGrams('')), 1)
542
        self.assertEqual(sim_jaccard(QGrams('nelson'), QGrams('')), 0)
543
        self.assertEqual(sim_jaccard(QGrams(''), QGrams('neilsen')), 0)
544
        self.assertAlmostEqual(sim_jaccard(QGrams('nelson'),
545
                                           QGrams('neilsen')), 4/11)
546
547
        # non-q-gram tests
548
        self.assertEqual(sim_jaccard('', '', None), 1)
549
        self.assertEqual(sim_jaccard('the quick', '', None), 0)
550
        self.assertEqual(sim_jaccard('', 'the quick', None), 0)
551
        self.assertAlmostEqual(sim_jaccard(NONQ_FROM, NONQ_TO, None), 1/3)
552
        self.assertAlmostEqual(sim_jaccard(NONQ_TO, NONQ_FROM, None), 1/3)
553
554
    def test_dist_jaccard(self):
555
        """Test abydos.distance.dist_jaccard."""
556
        self.assertEqual(dist_jaccard('', ''), 0)
557
        self.assertEqual(dist_jaccard('nelson', ''), 1)
558
        self.assertEqual(dist_jaccard('', 'neilsen'), 1)
559
        self.assertAlmostEqual(dist_jaccard('nelson', 'neilsen'), 7/11)
560
561
        self.assertEqual(dist_jaccard('', '', 2), 0)
562
        self.assertEqual(dist_jaccard('nelson', '', 2), 1)
563
        self.assertEqual(dist_jaccard('', 'neilsen', 2), 1)
564
        self.assertAlmostEqual(dist_jaccard('nelson', 'neilsen', 2), 7/11)
565
566
        # supplied q-gram tests
567
        self.assertEqual(dist_jaccard(QGrams(''), QGrams('')), 0)
568
        self.assertEqual(dist_jaccard(QGrams('nelson'), QGrams('')), 1)
569
        self.assertEqual(dist_jaccard(QGrams(''), QGrams('neilsen')), 1)
570
        self.assertAlmostEqual(dist_jaccard(QGrams('nelson'),
571
                                            QGrams('neilsen')), 7/11)
572
573
        # non-q-gram tests
574
        self.assertEqual(dist_jaccard('', '', None), 0)
575
        self.assertEqual(dist_jaccard('the quick', '', None), 1)
576
        self.assertEqual(dist_jaccard('', 'the quick', None), 1)
577
        self.assertAlmostEqual(dist_jaccard(NONQ_FROM, NONQ_TO, None), 2/3)
578
        self.assertAlmostEqual(dist_jaccard(NONQ_TO, NONQ_FROM, None), 2/3)
579
580
581
class OverlapTestCases(unittest.TestCase):
582
    """Test overlap functions.
583
584
    abydos.distance.sim_overlap & .dist_overlap
585
    """
586
587
    def test_sim_overlap(self):
588
        """Test abydos.distance.sim_overlap."""
589
        self.assertEqual(sim_overlap('', ''), 1)
590
        self.assertEqual(sim_overlap('nelson', ''), 0)
591
        self.assertEqual(sim_overlap('', 'neilsen'), 0)
592
        self.assertAlmostEqual(sim_overlap('nelson', 'neilsen'), 4/7)
593
594
        self.assertEqual(sim_overlap('', '', 2), 1)
595
        self.assertEqual(sim_overlap('nelson', '', 2), 0)
596
        self.assertEqual(sim_overlap('', 'neilsen', 2), 0)
597
        self.assertAlmostEqual(sim_overlap('nelson', 'neilsen', 2), 4/7)
598
599
        # supplied q-gram tests
600
        self.assertEqual(sim_overlap(QGrams(''), QGrams('')), 1)
601
        self.assertEqual(sim_overlap(QGrams('nelson'), QGrams('')), 0)
602
        self.assertEqual(sim_overlap(QGrams(''), QGrams('neilsen')), 0)
603
        self.assertAlmostEqual(sim_overlap(QGrams('nelson'),
604
                                           QGrams('neilsen')), 4/7)
605
606
        # non-q-gram tests
607
        self.assertEqual(sim_overlap('', '', None), 1)
608
        self.assertEqual(sim_overlap('the quick', '', None), 0)
609
        self.assertEqual(sim_overlap('', 'the quick', None), 0)
610
        self.assertAlmostEqual(sim_overlap(NONQ_FROM, NONQ_TO, None), 4/7)
611
        self.assertAlmostEqual(sim_overlap(NONQ_TO, NONQ_FROM, None), 4/7)
612
613
    def test_dist_overlap(self):
614
        """Test abydos.distance.dist_overlap."""
615
        self.assertEqual(dist_overlap('', ''), 0)
616
        self.assertEqual(dist_overlap('nelson', ''), 1)
617
        self.assertEqual(dist_overlap('', 'neilsen'), 1)
618
        self.assertAlmostEqual(dist_overlap('nelson', 'neilsen'), 3/7)
619
620
        self.assertEqual(dist_overlap('', '', 2), 0)
621
        self.assertEqual(dist_overlap('nelson', '', 2), 1)
622
        self.assertEqual(dist_overlap('', 'neilsen', 2), 1)
623
        self.assertAlmostEqual(dist_overlap('nelson', 'neilsen', 2), 3/7)
624
625
        # supplied q-gram tests
626
        self.assertEqual(dist_overlap(QGrams(''), QGrams('')), 0)
627
        self.assertEqual(dist_overlap(QGrams('nelson'), QGrams('')), 1)
628
        self.assertEqual(dist_overlap(QGrams(''), QGrams('neilsen')), 1)
629
        self.assertAlmostEqual(dist_overlap(QGrams('nelson'),
630
                                            QGrams('neilsen')), 3/7)
631
632
        # non-q-gram tests
633
        self.assertEqual(dist_overlap('', '', None), 0)
634
        self.assertEqual(dist_overlap('the quick', '', None), 1)
635
        self.assertEqual(dist_overlap('', 'the quick', None), 1)
636
        self.assertAlmostEqual(dist_overlap(NONQ_FROM, NONQ_TO, None), 3/7)
637
        self.assertAlmostEqual(dist_overlap(NONQ_TO, NONQ_FROM, None), 3/7)
638
639
640
class TanimotoTestCases(unittest.TestCase):
641
    """Test Tanimoto functions.
642
643
    abydos.distance.sim_tanimoto & .tanimoto
644
    """
645
646
    def test_tanimoto_coeff(self):
647
        """Test abydos.distance.sim_tanimoto."""
648
        self.assertEqual(sim_tanimoto('', ''), 1)
649
        self.assertEqual(sim_tanimoto('nelson', ''), 0)
650
        self.assertEqual(sim_tanimoto('', 'neilsen'), 0)
651
        self.assertAlmostEqual(sim_tanimoto('nelson', 'neilsen'), 4/11)
652
653
        self.assertEqual(sim_tanimoto('', '', 2), 1)
654
        self.assertEqual(sim_tanimoto('nelson', '', 2), 0)
655
        self.assertEqual(sim_tanimoto('', 'neilsen', 2), 0)
656
        self.assertAlmostEqual(sim_tanimoto('nelson', 'neilsen', 2), 4/11)
657
658
        # supplied q-gram tests
659
        self.assertEqual(sim_tanimoto(QGrams(''), QGrams('')), 1)
660
        self.assertEqual(sim_tanimoto(QGrams('nelson'), QGrams('')), 0)
661
        self.assertEqual(sim_tanimoto(QGrams(''), QGrams('neilsen')), 0)
662
        self.assertAlmostEqual(sim_tanimoto(QGrams('nelson'),
663
                                            QGrams('neilsen')), 4/11)
664
665
        # non-q-gram tests
666
        self.assertEqual(sim_tanimoto('', '', None), 1)
667
        self.assertEqual(sim_tanimoto('the quick', '', None), 0)
668
        self.assertEqual(sim_tanimoto('', 'the quick', None), 0)
669
        self.assertAlmostEqual(sim_tanimoto(NONQ_FROM, NONQ_TO, None), 1/3)
670
        self.assertAlmostEqual(sim_tanimoto(NONQ_TO, NONQ_FROM, None), 1/3)
671
672
    def test_tanimoto(self):
673
        """Test abydos.distance.tanimoto."""
674
        self.assertEqual(tanimoto('', ''), 0)
675
        self.assertEqual(tanimoto('nelson', ''), float('-inf'))
676
        self.assertEqual(tanimoto('', 'neilsen'), float('-inf'))
677
        self.assertAlmostEqual(tanimoto('nelson', 'neilsen'),
678
                               math.log(4/11, 2))
679
680
        self.assertEqual(tanimoto('', '', 2), 0)
681
        self.assertEqual(tanimoto('nelson', '', 2), float('-inf'))
682
        self.assertEqual(tanimoto('', 'neilsen', 2), float('-inf'))
683
        self.assertAlmostEqual(tanimoto('nelson', 'neilsen', 2),
684
                               math.log(4/11, 2))
685
686
        # supplied q-gram tests
687
        self.assertEqual(tanimoto(QGrams(''), QGrams('')), 0)
688
        self.assertEqual(tanimoto(QGrams('nelson'), QGrams('')), float('-inf'))
689
        self.assertEqual(tanimoto(QGrams(''), QGrams('neilsen')),
690
                         float('-inf'))
691
        self.assertAlmostEqual(tanimoto(QGrams('nelson'), QGrams('neilsen')),
692
                               math.log(4/11, 2))
693
694
        # non-q-gram tests
695
        self.assertEqual(tanimoto('', '', None), 0)
696
        self.assertEqual(tanimoto('the quick', '', None), float('-inf'))
697
        self.assertEqual(tanimoto('', 'the quick', None), float('-inf'))
698
        self.assertAlmostEqual(tanimoto(NONQ_FROM, NONQ_TO, None),
699
                               math.log(1/3, 2))
700
        self.assertAlmostEqual(tanimoto(NONQ_TO, NONQ_FROM, None),
701
                               math.log(1/3, 2))
702
703
704
class CosineSimilarityTestCases(unittest.TestCase):
705
    """Test cosine similarity functions.
706
707
    abydos.distance.sim_cosine & .dist_cosine
708
    """
709
710
    def test_sim_cosine(self):
711
        """Test abydos.distance.sim_cosine."""
712
        self.assertEqual(sim_cosine('', ''), 1)
713
        self.assertEqual(sim_cosine('nelson', ''), 0)
714
        self.assertEqual(sim_cosine('', 'neilsen'), 0)
715
        self.assertAlmostEqual(sim_cosine('nelson', 'neilsen'),
716
                               4/math.sqrt(7*8))
717
718
        self.assertEqual(sim_cosine('', '', 2), 1)
719
        self.assertEqual(sim_cosine('nelson', '', 2), 0)
720
        self.assertEqual(sim_cosine('', 'neilsen', 2), 0)
721
        self.assertAlmostEqual(sim_cosine('nelson', 'neilsen', 2),
722
                               4/math.sqrt(7*8))
723
724
        # supplied q-gram tests
725
        self.assertEqual(sim_cosine(QGrams(''), QGrams('')), 1)
726
        self.assertEqual(sim_cosine(QGrams('nelson'), QGrams('')), 0)
727
        self.assertEqual(sim_cosine(QGrams(''), QGrams('neilsen')), 0)
728
        self.assertAlmostEqual(sim_cosine(QGrams('nelson'), QGrams('neilsen')),
729
                               4/math.sqrt(7*8))
730
731
        # non-q-gram tests
732
        self.assertEqual(sim_cosine('', '', None), 1)
733
        self.assertEqual(sim_cosine('the quick', '', None), 0)
734
        self.assertEqual(sim_cosine('', 'the quick', None), 0)
735
        self.assertAlmostEqual(sim_cosine(NONQ_FROM, NONQ_TO, None),
736
                               4/math.sqrt(9*7))
737
        self.assertAlmostEqual(sim_cosine(NONQ_TO, NONQ_FROM, None),
738
                               4/math.sqrt(9*7))
739
740
    def test_dist_cosine(self):
741
        """Test abydos.distance.dist_cosine."""
742
        self.assertEqual(dist_cosine('', ''), 0)
743
        self.assertEqual(dist_cosine('nelson', ''), 1)
744
        self.assertEqual(dist_cosine('', 'neilsen'), 1)
745
        self.assertAlmostEqual(dist_cosine('nelson', 'neilsen'),
746
                               1-(4/math.sqrt(7*8)))
747
748
        self.assertEqual(dist_cosine('', '', 2), 0)
749
        self.assertEqual(dist_cosine('nelson', '', 2), 1)
750
        self.assertEqual(dist_cosine('', 'neilsen', 2), 1)
751
        self.assertAlmostEqual(dist_cosine('nelson', 'neilsen', 2),
752
                               1-(4/math.sqrt(7*8)))
753
754
        # supplied q-gram tests
755
        self.assertEqual(dist_cosine(QGrams(''), QGrams('')), 0)
756
        self.assertEqual(dist_cosine(QGrams('nelson'), QGrams('')), 1)
757
        self.assertEqual(dist_cosine(QGrams(''), QGrams('neilsen')), 1)
758
        self.assertAlmostEqual(dist_cosine(QGrams('nelson'),
759
                                           QGrams('neilsen')),
760
                               1-(4/math.sqrt(7*8)))
761
762
        # non-q-gram tests
763
        self.assertEqual(dist_cosine('', '', None), 0)
764
        self.assertEqual(dist_cosine('the quick', '', None), 1)
765
        self.assertEqual(dist_cosine('', 'the quick', None), 1)
766
        self.assertAlmostEqual(dist_cosine(NONQ_FROM, NONQ_TO, None),
767
                               1-4/math.sqrt(9*7))
768
        self.assertAlmostEqual(dist_cosine(NONQ_TO, NONQ_FROM, None),
769
                               1-4/math.sqrt(9*7))
770
771
772
class JaroWinklerTestCases(unittest.TestCase):
773
    """Test Jaro(-Winkler) functions.
774
775
    abydos.distance.sim_strcmp95, .dist_strcmp95, .sim_jaro_winkler, &
776
    .dist_jaro_winkler
777
    """
778
779
    def test_sim_strcmp95(self):
780
        """Test abydos.distance.sim_strcmp95."""
781
        self.assertEqual(sim_strcmp95('', ''), 1)
782
        self.assertEqual(sim_strcmp95('MARTHA', ''), 0)
783
        self.assertEqual(sim_strcmp95('', 'MARTHA'), 0)
784
        self.assertEqual(sim_strcmp95('MARTHA', 'MARTHA'), 1)
785
786
        self.assertAlmostEqual(sim_strcmp95('MARTHA', 'MARHTA'), 0.96111111)
787
        self.assertAlmostEqual(sim_strcmp95('DWAYNE', 'DUANE'), 0.873)
788
        self.assertAlmostEqual(sim_strcmp95('DIXON', 'DICKSONX'), 0.839333333)
789
790
        self.assertAlmostEqual(sim_strcmp95('ABCD', 'EFGH'), 0.0)
791
792
        # long_strings = True
793
        self.assertAlmostEqual(sim_strcmp95('DIXON', 'DICKSONX', True),
794
                               0.85393939)
795
        self.assertAlmostEqual(sim_strcmp95('DWAYNE', 'DUANE', True),
796
                               0.89609090)
797
        self.assertAlmostEqual(sim_strcmp95('MARTHA', 'MARHTA', True),
798
                               0.97083333)
799
800
    def test_dist_strcmp95(self):
801
        """Test abydos.distance.dist_strcmp95."""
802
        self.assertEqual(dist_strcmp95('', ''), 0)
803
        self.assertEqual(dist_strcmp95('MARTHA', ''), 1)
804
        self.assertEqual(dist_strcmp95('', 'MARTHA'), 1)
805
        self.assertEqual(dist_strcmp95('MARTHA', 'MARTHA'), 0)
806
807
        self.assertAlmostEqual(dist_strcmp95('MARTHA', 'MARHTA'), 0.03888888)
808
        self.assertAlmostEqual(dist_strcmp95('DWAYNE', 'DUANE'), 0.127)
809
        self.assertAlmostEqual(dist_strcmp95('DIXON', 'DICKSONX'), 0.160666666)
810
811
        self.assertAlmostEqual(dist_strcmp95('ABCD', 'EFGH'), 1.0)
812
813
    def test_sim_jaro_winkler(self):
814
        """Test abydos.distance.sim_jaro_winkler."""
815
        self.assertEqual(sim_jaro_winkler('', '', mode='jaro'), 1)
816
        self.assertEqual(sim_jaro_winkler('', '', mode='winkler'), 1)
817
        self.assertEqual(sim_jaro_winkler('MARTHA', '', mode='jaro'), 0)
818
        self.assertEqual(sim_jaro_winkler('MARTHA', '', mode='winkler'), 0)
819
        self.assertEqual(sim_jaro_winkler('', 'MARHTA', mode='jaro'), 0)
820
        self.assertEqual(sim_jaro_winkler('', 'MARHTA', mode='winkler'), 0)
821
        self.assertEqual(sim_jaro_winkler('MARTHA', 'MARTHA', mode='jaro'), 1)
822
        self.assertEqual(sim_jaro_winkler('MARTHA', 'MARTHA', mode='winkler'),
823
                         1)
824
825
        # https://en.wikipedia.org/wiki/Jaro-Winkler_distance
826
        self.assertAlmostEqual(sim_jaro_winkler('MARTHA', 'MARHTA',
827
                                                mode='jaro'), 0.94444444)
828
        self.assertAlmostEqual(sim_jaro_winkler('MARTHA', 'MARHTA',
829
                                                mode='winkler'), 0.96111111)
830
        self.assertAlmostEqual(sim_jaro_winkler('DWAYNE', 'DUANE',
831
                                                mode='jaro'), 0.82222222)
832
        self.assertAlmostEqual(sim_jaro_winkler('DWAYNE', 'DUANE',
833
                                                mode='winkler'), 0.84)
834
        self.assertAlmostEqual(sim_jaro_winkler('DIXON', 'DICKSONX',
835
                                                mode='jaro'), 0.76666666)
836
        self.assertAlmostEqual(sim_jaro_winkler('DIXON', 'DICKSONX',
837
                                                mode='winkler'), 0.81333333)
838
839
        self.assertRaises(ValueError, sim_jaro_winkler, 'abcd', 'dcba',
840
                          boost_threshold=2)
841
        self.assertRaises(ValueError, sim_jaro_winkler, 'abcd', 'dcba',
842
                          boost_threshold=-1)
843
        self.assertRaises(ValueError, sim_jaro_winkler, 'abcd', 'dcba',
844
                          scaling_factor=0.3)
845
        self.assertRaises(ValueError, sim_jaro_winkler, 'abcd', 'dcba',
846
                          scaling_factor=-1)
847
848
        self.assertAlmostEqual(sim_jaro_winkler('ABCD', 'EFGH'), 0.0)
849
850
        # long_strings = True (applies only to Jaro-Winkler, not Jaro)
851
        self.assertEqual(sim_jaro_winkler('ABCD', 'EFGH', long_strings=True),
852
                         sim_jaro_winkler('ABCD', 'EFGH'))
853
        self.assertEqual(sim_jaro_winkler('DIXON', 'DICKSONX', mode='jaro',
854
                                          long_strings=True),
855
                         sim_jaro_winkler('DIXON', 'DICKSONX',
856
                                          mode='jaro'))
857
        self.assertAlmostEqual(sim_jaro_winkler('DIXON', 'DICKSONX',
858
                                                mode='winkler',
859
                                                long_strings=True), 0.83030303)
860
        self.assertAlmostEqual(sim_jaro_winkler('MARTHA', 'MARHTA',
861
                                                mode='winkler',
862
                                                long_strings=True), 0.97083333)
863
864
    def test_dist_jaro_winkler(self):
865
        """Test abydos.distance.dist_jaro_winkler."""
866
        self.assertEqual(dist_jaro_winkler('', '', mode='jaro'), 0)
867
        self.assertEqual(dist_jaro_winkler('', '', mode='winkler'), 0)
868
        self.assertEqual(dist_jaro_winkler('MARTHA', '', mode='jaro'), 1)
869
        self.assertEqual(dist_jaro_winkler('MARTHA', '', mode='winkler'), 1)
870
        self.assertEqual(dist_jaro_winkler('', 'MARHTA', mode='jaro'), 1)
871
        self.assertEqual(dist_jaro_winkler('', 'MARHTA', mode='winkler'), 1)
872
        self.assertEqual(dist_jaro_winkler('MARTHA', 'MARTHA', mode='jaro'), 0)
873
        self.assertEqual(dist_jaro_winkler('MARTHA', 'MARTHA', mode='winkler'),
874
                         0)
875
876
        # https://en.wikipedia.org/wiki/Jaro-Winkler_distance
877
        self.assertAlmostEqual(dist_jaro_winkler('MARTHA', 'MARHTA',
878
                                                 mode='jaro'), 0.05555555)
879
        self.assertAlmostEqual(dist_jaro_winkler('MARTHA', 'MARHTA',
880
                                                 mode='winkler'), 0.03888888)
881
        self.assertAlmostEqual(dist_jaro_winkler('DWAYNE', 'DUANE',
882
                                                 mode='jaro'), 0.17777777)
883
        self.assertAlmostEqual(dist_jaro_winkler('DWAYNE', 'DUANE',
884
                                                 mode='winkler'), 0.16)
885
        self.assertAlmostEqual(dist_jaro_winkler('DIXON', 'DICKSONX',
886
                                                 mode='jaro'), 0.23333333)
887
        self.assertAlmostEqual(dist_jaro_winkler('DIXON', 'DICKSONX',
888
                                                 mode='winkler'), 0.18666666)
889
890
        self.assertRaises(ValueError, dist_jaro_winkler, 'abcd', 'dcba',
891
                          boost_threshold=2)
892
        self.assertRaises(ValueError, dist_jaro_winkler, 'abcd', 'dcba',
893
                          boost_threshold=-1)
894
        self.assertRaises(ValueError, dist_jaro_winkler, 'abcd', 'dcba',
895
                          scaling_factor=0.3)
896
        self.assertRaises(ValueError, dist_jaro_winkler, 'abcd', 'dcba',
897
                          scaling_factor=-1)
898
899
        self.assertAlmostEqual(dist_jaro_winkler('ABCD', 'EFGH'), 1.0)
900
901
902
class LcsseqTestCases(unittest.TestCase):
903
    """Test LCSseq functions.
904
905
    abydos.distance.lcsseq, .sim_lcsseq, & .dist_lcsseq
906
    """
907
908
    def test_lcsseq(self):
909
        """Test abydos.distance.lcsseq."""
910
        self.assertEqual(lcsseq('', ''), '')
911
        self.assertEqual(lcsseq('A', ''), '')
912
        self.assertEqual(lcsseq('', 'A'), '')
913
        self.assertEqual(lcsseq('A', 'A'), 'A')
914
        self.assertEqual(lcsseq('ABCD', ''), '')
915
        self.assertEqual(lcsseq('', 'ABCD'), '')
916
        self.assertEqual(lcsseq('ABCD', 'ABCD'), 'ABCD')
917
        self.assertEqual(lcsseq('ABCD', 'BC'), 'BC')
918
        self.assertEqual(lcsseq('ABCD', 'AD'), 'AD')
919
        self.assertEqual(lcsseq('ABCD', 'AC'), 'AC')
920
        self.assertEqual(lcsseq('AB', 'CD'), '')
921
        self.assertEqual(lcsseq('ABC', 'BCD'), 'BC')
922
923
        self.assertEqual(lcsseq('DIXON', 'DICKSONX'), 'DION')
924
925
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
926
        self.assertEqual(lcsseq('AGCAT', 'GAC'), 'AC')
927
        self.assertEqual(lcsseq('XMJYAUZ', 'MZJAWXU'), 'MJAU')
928
929
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
930
        self.assertEqual(lcsseq('hell', 'hello'), 'hell')
931
        self.assertEqual(lcsseq('hello', 'hell'), 'hell')
932
        self.assertEqual(lcsseq('ell', 'hell'), 'ell')
933
        self.assertEqual(lcsseq('hell', 'ell'), 'ell')
934
        self.assertEqual(lcsseq('faxbcd', 'abdef'), 'abd')
935
936
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
937
        self.assertEqual(lcsseq('hello world', 'world war 2'), 'world')
938
        self.assertEqual(lcsseq('foo bar', 'bar foo'), 'foo')
939
        self.assertEqual(lcsseq('aaa', 'aa'), 'aa')
940
        self.assertEqual(lcsseq('cc', 'bbbbcccccc'), 'cc')
941
        self.assertEqual(lcsseq('ccc', 'bcbb'), 'c')
942
943
    def test_sim_lcsseq(self):
944
        """Test abydos.distance.sim_lcsseq."""
945
        self.assertEqual(sim_lcsseq('', ''), 1)
946
        self.assertEqual(sim_lcsseq('A', ''), 0)
947
        self.assertEqual(sim_lcsseq('', 'A'), 0)
948
        self.assertEqual(sim_lcsseq('A', 'A'), 1)
949
        self.assertEqual(sim_lcsseq('ABCD', ''), 0)
950
        self.assertEqual(sim_lcsseq('', 'ABCD'), 0)
951
        self.assertEqual(sim_lcsseq('ABCD', 'ABCD'), 1)
952
        self.assertAlmostEqual(sim_lcsseq('ABCD', 'BC'), 2/4)
953
        self.assertAlmostEqual(sim_lcsseq('ABCD', 'AD'), 2/4)
954
        self.assertAlmostEqual(sim_lcsseq('ABCD', 'AC'), 2/4)
955
        self.assertAlmostEqual(sim_lcsseq('AB', 'CD'), 0)
956
        self.assertAlmostEqual(sim_lcsseq('ABC', 'BCD'), 2/3)
957
958
        self.assertAlmostEqual(sim_lcsseq('DIXON', 'DICKSONX'), 4/8)
959
960
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
961
        self.assertAlmostEqual(sim_lcsseq('AGCAT', 'GAC'), 2/5)
962
        self.assertAlmostEqual(sim_lcsseq('XMJYAUZ', 'MZJAWXU'), 4/7)
963
964
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
965
        self.assertAlmostEqual(sim_lcsseq('hell', 'hello'), 4/5)
966
        self.assertAlmostEqual(sim_lcsseq('hello', 'hell'), 4/5)
967
        self.assertAlmostEqual(sim_lcsseq('ell', 'hell'), 3/4)
968
        self.assertAlmostEqual(sim_lcsseq('hell', 'ell'), 3/4)
969
        self.assertAlmostEqual(sim_lcsseq('faxbcd', 'abdef'), 3/6)
970
971
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
972
        self.assertAlmostEqual(sim_lcsseq('hello world', 'world war 2'), 5/11)
973
        self.assertAlmostEqual(sim_lcsseq('foo bar', 'bar foo'), 3/7)
974
        self.assertAlmostEqual(sim_lcsseq('aaa', 'aa'), 2/3)
975
        self.assertAlmostEqual(sim_lcsseq('cc', 'bbbbcccccc'), 2/10)
976
        self.assertAlmostEqual(sim_lcsseq('ccc', 'bcbb'), 1/4)
977
978
    def test_dist_lcsseq(self):
979
        """Test abydos.distance.dist_lcsseq."""
980
        self.assertEqual(dist_lcsseq('', ''), 0)
981
        self.assertEqual(dist_lcsseq('A', ''), 1)
982
        self.assertEqual(dist_lcsseq('', 'A'), 1)
983
        self.assertEqual(dist_lcsseq('A', 'A'), 0)
984
        self.assertEqual(dist_lcsseq('ABCD', ''), 1)
985
        self.assertEqual(dist_lcsseq('', 'ABCD'), 1)
986
        self.assertEqual(dist_lcsseq('ABCD', 'ABCD'), 0)
987
        self.assertAlmostEqual(dist_lcsseq('ABCD', 'BC'), 2/4)
988
        self.assertAlmostEqual(dist_lcsseq('ABCD', 'AD'), 2/4)
989
        self.assertAlmostEqual(dist_lcsseq('ABCD', 'AC'), 2/4)
990
        self.assertAlmostEqual(dist_lcsseq('AB', 'CD'), 1)
991
        self.assertAlmostEqual(dist_lcsseq('ABC', 'BCD'), 1/3)
992
993
        self.assertAlmostEqual(dist_lcsseq('DIXON', 'DICKSONX'), 4/8)
994
995
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
996
        self.assertAlmostEqual(dist_lcsseq('AGCAT', 'GAC'), 3/5)
997
        self.assertAlmostEqual(dist_lcsseq('XMJYAUZ', 'MZJAWXU'), 3/7)
998
999
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1000
        self.assertAlmostEqual(dist_lcsseq('hell', 'hello'), 1/5)
1001
        self.assertAlmostEqual(dist_lcsseq('hello', 'hell'), 1/5)
1002
        self.assertAlmostEqual(dist_lcsseq('ell', 'hell'), 1/4)
1003
        self.assertAlmostEqual(dist_lcsseq('hell', 'ell'), 1/4)
1004
        self.assertAlmostEqual(dist_lcsseq('faxbcd', 'abdef'), 3/6)
1005
1006
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1007
        self.assertAlmostEqual(dist_lcsseq('hello world', 'world war 2'), 6/11)
1008
        self.assertAlmostEqual(dist_lcsseq('foo bar', 'bar foo'), 4/7)
1009
        self.assertAlmostEqual(dist_lcsseq('aaa', 'aa'), 1/3)
1010
        self.assertAlmostEqual(dist_lcsseq('cc', 'bbbbcccccc'), 8/10)
1011
        self.assertAlmostEqual(dist_lcsseq('ccc', 'bcbb'), 3/4)
1012
1013
1014
class LcsstrTestCases(unittest.TestCase):
1015
    """Test LCSstr functions.
1016
1017
    abydos.distance.lcsstr, .sim_lcsstr, & .dist_lcsstr
1018
    """
1019
1020
    def test_lcsstr(self):
1021
        """Test abydos.distance.lcsstr."""
1022
        self.assertEqual(lcsstr('', ''), '')
1023
        self.assertEqual(lcsstr('A', ''), '')
1024
        self.assertEqual(lcsstr('', 'A'), '')
1025
        self.assertEqual(lcsstr('A', 'A'), 'A')
1026
        self.assertEqual(lcsstr('ABCD', ''), '')
1027
        self.assertEqual(lcsstr('', 'ABCD'), '')
1028
        self.assertEqual(lcsstr('ABCD', 'ABCD'), 'ABCD')
1029
        self.assertEqual(lcsstr('ABCD', 'BC'), 'BC')
1030
        self.assertEqual(lcsstr('ABCD', 'AD'), 'A')
1031
        self.assertEqual(lcsstr('ABCD', 'AC'), 'A')
1032
        self.assertEqual(lcsstr('AB', 'CD'), '')
1033
        self.assertEqual(lcsstr('ABC', 'BCD'), 'BC')
1034
1035
        self.assertEqual(lcsstr('DIXON', 'DICKSONX'), 'DI')
1036
1037
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1038
        self.assertEqual(lcsstr('AGCAT', 'GAC'), 'A')
1039
        self.assertEqual(lcsstr('XMJYAUZ', 'MZJAWXU'), 'X')
1040
1041
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1042
        self.assertEqual(lcsstr('hell', 'hello'), 'hell')
1043
        self.assertEqual(lcsstr('hello', 'hell'), 'hell')
1044
        self.assertEqual(lcsstr('ell', 'hell'), 'ell')
1045
        self.assertEqual(lcsstr('hell', 'ell'), 'ell')
1046
        self.assertEqual(lcsstr('faxbcd', 'abdef'), 'f')
1047
1048
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1049
        self.assertEqual(lcsstr('hello world', 'world war 2'), 'world')
1050
        self.assertEqual(lcsstr('foo bar', 'bar foo'), 'foo')
1051
        self.assertEqual(lcsstr('aaa', 'aa'), 'aa')
1052
        self.assertEqual(lcsstr('cc', 'bbbbcccccc'), 'cc')
1053
        self.assertEqual(lcsstr('ccc', 'bcbb'), 'c')
1054
1055
        # http://www.maplesoft.com/support/help/Maple/view.aspx?path=StringTools/LongestCommonSubString
1056
        self.assertEqual(lcsstr('abax', 'bax'), 'bax')
1057
        self.assertEqual(lcsstr('tsaxbaxyz', 'axcaxy'), 'axy')
1058
        self.assertEqual(lcsstr('abcde', 'uvabxycde'), 'cde')
1059
        self.assertEqual(lcsstr('abc', 'xyz'), '')
1060
        self.assertEqual(lcsstr('TAAGGTCGGCGCGCACGCTGGCGAGTATGGTGCGGAGGCCCTGGAG\
1061
AGGTGAGGCTCCCTCCCCTGCTCCGACCCGGGCTCCTCGCCCGCCCGGACCCAC', 'AAGCGCCGCGCAGTCTGGGCT\
1062
CCGCACACTTCTGGTCCAGTCCGACTGAGAAGGAACCACCATGGTGCTGTCTCCCGCTGACAAGACCAACATCAAGACT\
1063
GCCTGGGAAAAGATCGGCAGCCACGGTGGCGAGTATGGCGCCGAGGCCGT'), 'TGGCGAGTATGG')
1064
1065
    def test_sim_lcsstr(self):
1066
        """Test abydos.distance.sim_lcsstr."""
1067
        self.assertEqual(sim_lcsstr('', ''), 1)
1068
        self.assertEqual(sim_lcsstr('A', ''), 0)
1069
        self.assertEqual(sim_lcsstr('', 'A'), 0)
1070
        self.assertEqual(sim_lcsstr('A', 'A'), 1)
1071
        self.assertEqual(sim_lcsstr('ABCD', ''), 0)
1072
        self.assertEqual(sim_lcsstr('', 'ABCD'), 0)
1073
        self.assertEqual(sim_lcsstr('ABCD', 'ABCD'), 1)
1074
        self.assertAlmostEqual(sim_lcsstr('ABCD', 'BC'), 2/4)
1075
        self.assertAlmostEqual(sim_lcsstr('ABCD', 'AD'), 1/4)
1076
        self.assertAlmostEqual(sim_lcsstr('ABCD', 'AC'), 1/4)
1077
        self.assertAlmostEqual(sim_lcsstr('AB', 'CD'), 0)
1078
        self.assertAlmostEqual(sim_lcsstr('ABC', 'BCD'), 2/3)
1079
1080
        self.assertAlmostEqual(sim_lcsstr('DIXON', 'DICKSONX'), 2/8)
1081
1082
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1083
        self.assertAlmostEqual(sim_lcsstr('AGCAT', 'GAC'), 1/5)
1084
        self.assertAlmostEqual(sim_lcsstr('XMJYAUZ', 'MZJAWXU'), 1/7)
1085
1086
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1087
        self.assertAlmostEqual(sim_lcsstr('hell', 'hello'), 4/5)
1088
        self.assertAlmostEqual(sim_lcsstr('hello', 'hell'), 4/5)
1089
        self.assertAlmostEqual(sim_lcsstr('ell', 'hell'), 3/4)
1090
        self.assertAlmostEqual(sim_lcsstr('hell', 'ell'), 3/4)
1091
        self.assertAlmostEqual(sim_lcsstr('faxbcd', 'abdef'), 1/6)
1092
1093
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1094
        self.assertAlmostEqual(sim_lcsstr('hello world', 'world war 2'), 5/11)
1095
        self.assertAlmostEqual(sim_lcsstr('foo bar', 'bar foo'), 3/7)
1096
        self.assertAlmostEqual(sim_lcsstr('aaa', 'aa'), 2/3)
1097
        self.assertAlmostEqual(sim_lcsstr('cc', 'bbbbcccccc'), 2/10)
1098
        self.assertAlmostEqual(sim_lcsstr('ccc', 'bcbb'), 1/4)
1099
1100
    def test_dist_lcsstr(self):
1101
        """Test abydos.distance.dist_lcsstr."""
1102
        self.assertEqual(dist_lcsstr('', ''), 0)
1103
        self.assertEqual(dist_lcsstr('A', ''), 1)
1104
        self.assertEqual(dist_lcsstr('', 'A'), 1)
1105
        self.assertEqual(dist_lcsstr('A', 'A'), 0)
1106
        self.assertEqual(dist_lcsstr('ABCD', ''), 1)
1107
        self.assertEqual(dist_lcsstr('', 'ABCD'), 1)
1108
        self.assertEqual(dist_lcsstr('ABCD', 'ABCD'), 0)
1109
        self.assertAlmostEqual(dist_lcsstr('ABCD', 'BC'), 2/4)
1110
        self.assertAlmostEqual(dist_lcsstr('ABCD', 'AD'), 3/4)
1111
        self.assertAlmostEqual(dist_lcsstr('ABCD', 'AC'), 3/4)
1112
        self.assertAlmostEqual(dist_lcsstr('AB', 'CD'), 1)
1113
        self.assertAlmostEqual(dist_lcsstr('ABC', 'BCD'), 1/3)
1114
1115
        self.assertAlmostEqual(dist_lcsstr('DIXON', 'DICKSONX'), 6/8)
1116
1117
        # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
1118
        self.assertAlmostEqual(dist_lcsstr('AGCAT', 'GAC'), 4/5)
1119
        self.assertAlmostEqual(dist_lcsstr('XMJYAUZ', 'MZJAWXU'), 6/7)
1120
1121
        # https://github.com/jwmerrill/factor/blob/master/basis/lcs/lcs-tests.factor
1122
        self.assertAlmostEqual(dist_lcsstr('hell', 'hello'), 1/5)
1123
        self.assertAlmostEqual(dist_lcsstr('hello', 'hell'), 1/5)
1124
        self.assertAlmostEqual(dist_lcsstr('ell', 'hell'), 1/4)
1125
        self.assertAlmostEqual(dist_lcsstr('hell', 'ell'), 1/4)
1126
        self.assertAlmostEqual(dist_lcsstr('faxbcd', 'abdef'), 5/6)
1127
1128
        # http://www.unesco.org/culture/languages-atlas/assets/_core/php/qcubed_unit_tests.php
1129
        self.assertAlmostEqual(dist_lcsstr('hello world', 'world war 2'), 6/11)
1130
        self.assertAlmostEqual(dist_lcsstr('foo bar', 'bar foo'), 4/7)
1131
        self.assertAlmostEqual(dist_lcsstr('aaa', 'aa'), 1/3)
1132
        self.assertAlmostEqual(dist_lcsstr('cc', 'bbbbcccccc'), 8/10)
1133
        self.assertAlmostEqual(dist_lcsstr('ccc', 'bcbb'), 3/4)
1134
1135
1136
class RatcliffObershelpTestCases(unittest.TestCase):
1137
    """Test Ratcliff-Obserhelp functions.
1138
1139
    abydos.distance.sim_ratcliff_obershelp, &
1140
    abydos.distance.dist_ratcliff_obershelp
1141
    """
1142
1143
    def test_sim_ratcliff_obershelp(self):
1144
        """Test abydos.distance.sim_ratcliff_obershelp."""
1145
        # https://github.com/rockymadden/stringmetric/blob/master/core/src/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala
1146
        self.assertEqual(sim_ratcliff_obershelp('', ''), 1)
1147
        self.assertEqual(sim_ratcliff_obershelp('abc', ''), 0)
1148
        self.assertEqual(sim_ratcliff_obershelp('', 'xyz'), 0)
1149
        self.assertEqual(sim_ratcliff_obershelp('abc', 'abc'), 1)
1150
        self.assertEqual(sim_ratcliff_obershelp('123', '123'), 1)
1151
        self.assertEqual(sim_ratcliff_obershelp('abc', 'xyz'), 0)
1152
        self.assertEqual(sim_ratcliff_obershelp('123', '456'), 0)
1153
        self.assertAlmostEqual(sim_ratcliff_obershelp('aleksander',
1154
                                                      'alexandre'),
1155
                               0.7368421052631579)
1156
        self.assertAlmostEqual(sim_ratcliff_obershelp('alexandre',
1157
                                                      'aleksander'),
1158
                               0.7368421052631579)
1159
        self.assertAlmostEqual(sim_ratcliff_obershelp('pennsylvania',
1160
                                                      'pencilvaneya'),
1161
                               0.6666666666666666)
1162
        self.assertAlmostEqual(sim_ratcliff_obershelp('pencilvaneya',
1163
                                                      'pennsylvania'),
1164
                               0.6666666666666666)
1165
        self.assertAlmostEqual(sim_ratcliff_obershelp('abcefglmn', 'abefglmo'),
1166
                               0.8235294117647058)
1167
        self.assertAlmostEqual(sim_ratcliff_obershelp('abefglmo', 'abcefglmn'),
1168
                               0.8235294117647058)
1169
1170
        with open(TESTDIR+'/corpora/variantNames.csv') as cav_testset:
1171
            next(cav_testset)
1172
            for line in cav_testset:
1173
                line = line.strip().split(',')
1174
                word1, word2 = line[0], line[4]
1175
                self.assertAlmostEqual(sim_ratcliff_obershelp(word1, word2),
1176
                                       SequenceMatcher(None, word1,
1177
                                                       word2).ratio())
1178
1179
        with open(TESTDIR+'/corpora/wikipediaCommonMisspellings.csv') as missp:
1180
            next(missp)
1181
            for line in missp:
1182
                line = line.strip().upper()
1183
                line = ''.join([_ for _ in line.strip() if _ in
1184
                                tuple('ABCDEFGHIJKLMNOPQRSTUVWXYZ,')])
1185
                word1, word2 = line.split(',')
1186
                # print(word1, word2e)
1187
                self.assertAlmostEqual(sim_ratcliff_obershelp(word1, word2),
1188
                                       SequenceMatcher(None, word1,
1189
                                                       word2).ratio())
1190
1191
    def test_dist_ratcliff_obershelp(self):
1192
        """Test abydos.distance.dist_ratcliff_obershelp."""
1193
        # https://github.com/rockymadden/stringmetric/blob/master/core/src/test/scala/com/rockymadden/stringmetric/similarity/RatcliffObershelpMetricSpec.scala
1194
        self.assertEqual(dist_ratcliff_obershelp('', ''), 0)
1195
        self.assertEqual(dist_ratcliff_obershelp('abc', ''), 1)
1196
        self.assertEqual(dist_ratcliff_obershelp('', 'xyz'), 1)
1197
        self.assertEqual(dist_ratcliff_obershelp('abc', 'abc'), 0)
1198
        self.assertEqual(dist_ratcliff_obershelp('123', '123'), 0)
1199
        self.assertEqual(dist_ratcliff_obershelp('abc', 'xyz'), 1)
1200
        self.assertEqual(dist_ratcliff_obershelp('123', '456'), 1)
1201
        self.assertAlmostEqual(dist_ratcliff_obershelp('aleksander',
1202
                                                       'alexandre'),
1203
                               0.2631578947368421)
1204
        self.assertAlmostEqual(dist_ratcliff_obershelp('alexandre',
1205
                                                       'aleksander'),
1206
                               0.2631578947368421)
1207
        self.assertAlmostEqual(dist_ratcliff_obershelp('pennsylvania',
1208
                                                       'pencilvaneya'),
1209
                               0.3333333333333333)
1210
        self.assertAlmostEqual(dist_ratcliff_obershelp('pencilvaneya',
1211
                                                       'pennsylvania'),
1212
                               0.3333333333333333)
1213
        self.assertAlmostEqual(dist_ratcliff_obershelp('abcefglmn',
1214
                                                       'abefglmo'),
1215
                               0.1764705882352941)
1216
        self.assertAlmostEqual(dist_ratcliff_obershelp('abefglmo',
1217
                                                       'abcefglmn'),
1218
                               0.1764705882352941)
1219
1220
1221
class MraTestCases(unittest.TestCase):
1222
    """Test MRA functions.
1223
1224
    abydos.distance.mra_compare, .sim_mra & .dist_mra
1225
    """
1226
1227
    def test_mra_compare(self):
1228
        """Test abydos.distance.mra_compare."""
1229
        self.assertEqual(mra_compare('', ''), 6)
1230
        self.assertEqual(mra_compare('a', 'a'), 6)
1231
        self.assertEqual(mra_compare('abcdefg', 'abcdefg'), 6)
1232
        self.assertEqual(mra_compare('abcdefg', ''), 0)
1233
        self.assertEqual(mra_compare('', 'abcdefg'), 0)
1234
1235
        # https://en.wikipedia.org/wiki/Match_rating_approach
1236
        self.assertEqual(mra_compare('Byrne', 'Boern'), 5)
1237
        self.assertEqual(mra_compare('Smith', 'Smyth'), 5)
1238
        self.assertEqual(mra_compare('Catherine', 'Kathryn'), 4)
1239
1240
        self.assertEqual(mra_compare('ab', 'abcdefgh'), 0)
1241
        self.assertEqual(mra_compare('ab', 'ac'), 5)
1242
        self.assertEqual(mra_compare('abcdefik', 'abcdefgh'), 3)
1243
        self.assertEqual(mra_compare('xyz', 'abc'), 0)
1244
1245
    def test_sim_mra(self):
1246
        """Test abydos.distance.sim_mra."""
1247
        self.assertEqual(sim_mra('', ''), 1)
1248
        self.assertEqual(sim_mra('a', 'a'), 1)
1249
        self.assertEqual(sim_mra('abcdefg', 'abcdefg'), 1)
1250
        self.assertEqual(sim_mra('abcdefg', ''), 0)
1251
        self.assertEqual(sim_mra('', 'abcdefg'), 0)
1252
1253
        # https://en.wikipedia.org/wiki/Match_rating_approach
1254
        self.assertEqual(sim_mra('Byrne', 'Boern'), 5/6)
1255
        self.assertEqual(sim_mra('Smith', 'Smyth'), 5/6)
1256
        self.assertEqual(sim_mra('Catherine', 'Kathryn'), 4/6)
1257
1258
        self.assertEqual(sim_mra('ab', 'abcdefgh'), 0)
1259
        self.assertEqual(sim_mra('ab', 'ac'), 5/6)
1260
        self.assertEqual(sim_mra('abcdefik', 'abcdefgh'), 3/6)
1261
        self.assertEqual(sim_mra('xyz', 'abc'), 0)
1262
1263
    def test_dist_mra(self):
1264
        """Test abydos.distance.dist_mra."""
1265
        self.assertEqual(dist_mra('', ''), 0)
1266
        self.assertEqual(dist_mra('a', 'a'), 0)
1267
        self.assertEqual(dist_mra('abcdefg', 'abcdefg'), 0)
1268
        self.assertEqual(dist_mra('abcdefg', ''), 1)
1269
        self.assertEqual(dist_mra('', 'abcdefg'), 1)
1270
1271
        # https://en.wikipedia.org/wiki/Match_rating_approach
1272
        self.assertAlmostEqual(dist_mra('Byrne', 'Boern'), 1/6)
1273
        self.assertAlmostEqual(dist_mra('Smith', 'Smyth'), 1/6)
1274
        self.assertAlmostEqual(dist_mra('Catherine', 'Kathryn'), 2/6)
1275
1276
        self.assertEqual(dist_mra('ab', 'abcdefgh'), 1)
1277
        self.assertAlmostEqual(dist_mra('ab', 'ac'), 1/6)
1278
        self.assertAlmostEqual(dist_mra('abcdefik', 'abcdefgh'), 3/6)
1279
        self.assertEqual(dist_mra('xyz', 'abc'), 1)
1280
1281
1282
class CompressionTestCases(unittest.TestCase):
1283
    """Test compression distance functions.
1284
1285
    abydos.distance.dist_compression & .sim_compression
1286
    """
1287
1288
    arith_dict = ac_train(' '.join(NIALL))
1289
1290
    def test_dist_compression(self):
1291
        """Test abydos.distance.dist_compression."""
1292
        self.assertEqual(dist_compression('', ''), 0)
1293
        self.assertEqual(dist_compression('', '', 'bzip2'), 0)
1294
        self.assertEqual(dist_compression('', '', 'lzma'), 0)
1295
        self.assertEqual(dist_compression('', '', 'zlib'), 0)
1296
        self.assertEqual(dist_compression('', '', 'arith'), 0)
1297
        self.assertEqual(dist_compression('', '', 'arith', self.arith_dict), 0)
1298
        self.assertEqual(dist_compression('', '', 'rle'), 0)
1299
        self.assertEqual(dist_compression('', '', 'bwtrle'), 0)
1300
1301
        self.assertGreater(dist_compression('a', ''), 0)
1302
        self.assertGreater(dist_compression('a', '', 'bzip2'), 0)
1303
        self.assertGreater(dist_compression('a', '', 'lzma'), 0)
1304
        self.assertGreater(dist_compression('a', '', 'zlib'), 0)
1305
        self.assertGreater(dist_compression('a', '', 'arith'), 0)
1306
        self.assertGreater(dist_compression('a', '', 'arith', self.arith_dict),
1307
                           0)
1308
        self.assertGreater(dist_compression('a', '', 'rle'), 0)
1309
        self.assertGreater(dist_compression('a', '', 'bwtrle'), 0)
1310
1311
        self.assertGreater(dist_compression('abcdefg', 'fg'), 0)
1312
        self.assertGreater(dist_compression('abcdefg', 'fg', 'bzip2'), 0)
1313
        self.assertGreater(dist_compression('abcdefg', 'fg', 'lzma'), 0)
1314
        self.assertGreater(dist_compression('abcdefg', 'fg', 'zlib'), 0)
1315
        self.assertGreater(dist_compression('abcdefg', 'fg', 'arith'), 0)
1316
        self.assertGreater(dist_compression('abcdefg', 'fg', 'rle'), 0)
1317
        self.assertGreater(dist_compression('abcdefg', 'fg', 'bwtrle'), 0)
1318
1319
    def test_dist_compression_arith(self):
1320
        """Test abydos.distance.dist_compression (arithmetric compression)."""
1321
        self.assertAlmostEqual(dist_compression('Niall', 'Neil', 'arith',
1322
                                                self.arith_dict),
1323
                               0.608695652173913)
1324
        self.assertAlmostEqual(dist_compression('Neil', 'Niall', 'arith',
1325
                                                self.arith_dict),
1326
                               0.608695652173913)
1327
        self.assertAlmostEqual(dist_compression('Niall', 'Neil', 'arith'),
1328
                               0.6875)
1329
        self.assertAlmostEqual(dist_compression('Neil', 'Niall', 'arith'),
1330
                               0.6875)
1331
        self.assertAlmostEqual(dist_compression('Njáll', 'Njall', 'arith',
1332
                                                self.arith_dict),
1333
                               0.714285714285714)
1334
        self.assertAlmostEqual(dist_compression('Njall', 'Njáll', 'arith',
1335
                                                self.arith_dict),
1336
                               0.714285714285714)
1337
        self.assertAlmostEqual(dist_compression('Njáll', 'Njall', 'arith'),
1338
                               0.75)
1339
        self.assertAlmostEqual(dist_compression('Njall', 'Njáll', 'arith'),
1340
                               0.75)
1341
1342
    def test_dist_compression_rle(self):
1343
        """Test abydos.distance.dist_compression (RLE & BWT+RLE)."""
1344
        self.assertAlmostEqual(dist_compression('abc', 'abc', 'rle'), 0)
1345
        self.assertAlmostEqual(dist_compression('abc', 'def', 'rle'), 1)
1346
1347
        self.assertAlmostEqual(dist_compression('abc', 'abc', 'bwtrle'), 0)
1348
        self.assertAlmostEqual(dist_compression('abc', 'def', 'bwtrle'), 0.75)
1349
1350
        self.assertAlmostEqual(dist_compression('aaa', 'bbaaa', 'rle'), 0.5)
1351
        self.assertAlmostEqual(dist_compression('abb', 'bbba', 'rle'), 1/3)
1352
        self.assertAlmostEqual(dist_compression('banana', 'banane', 'bwtrle'),
1353
                               0.57142857142)
1354
        self.assertAlmostEqual(dist_compression('bananas', 'bananen',
1355
                                                'bwtrle'),
1356
                               0.5)
1357
1358
    def test_sim_compression(self):
1359
        """Test abydos.distance.sim_compression."""
1360
        self.assertEqual(sim_compression('', ''), 1)
1361
        self.assertEqual(sim_compression('', '', 'bzip2'), 1)
1362
        self.assertEqual(sim_compression('', '', 'lzma'), 1)
1363
        self.assertEqual(sim_compression('', '', 'zlib'), 1)
1364
        self.assertEqual(sim_compression('', '', 'arith'), 1)
1365
        self.assertEqual(sim_compression('', '', 'arith', self.arith_dict), 1)
1366
        self.assertEqual(sim_compression('', '', 'rle'), 1)
1367
        self.assertEqual(sim_compression('', '', 'bwtrle'), 1)
1368
1369
        self.assertLess(sim_compression('a', ''), 1)
1370
        self.assertLess(sim_compression('a', '', 'bzip2'), 1)
1371
        self.assertLess(sim_compression('a', '', 'lzma'), 1)
1372
        self.assertLess(sim_compression('a', '', 'zlib'), 1)
1373
        self.assertLess(sim_compression('a', '', 'arith'), 1)
1374
        self.assertLess(sim_compression('a', '', 'arith', self.arith_dict), 1)
1375
        self.assertLess(sim_compression('a', '', 'rle'), 1)
1376
        self.assertLess(sim_compression('a', '', 'bwtrle'), 1)
1377
1378
        self.assertLess(sim_compression('abcdefg', 'fg'), 1)
1379
        self.assertLess(sim_compression('abcdefg', 'fg', 'bzip2'), 1)
1380
        self.assertLess(sim_compression('abcdefg', 'fg', 'lzma'), 1)
1381
        self.assertLess(sim_compression('abcdefg', 'fg', 'zlib'), 1)
1382
        self.assertLess(sim_compression('abcdefg', 'fg', 'arith'), 1)
1383
        self.assertLess(sim_compression('abcdefg', 'fg', 'rle'), 1)
1384
        self.assertLess(sim_compression('abcdefg', 'fg', 'bwtrle'), 1)
1385
1386
    def test_sim_compression_arith(self):
1387
        """Test abydos.distance.sim_compression (arithmetric compression)."""
1388
        self.assertAlmostEqual(sim_compression('Niall', 'Neil', 'arith',
1389
                                               self.arith_dict),
1390
                               0.3913043478260869)
1391
        self.assertAlmostEqual(sim_compression('Neil', 'Niall', 'arith',
1392
                                               self.arith_dict),
1393
                               0.3913043478260869)
1394
        self.assertAlmostEqual(sim_compression('Niall', 'Neil', 'arith'),
1395
                               0.3125)
1396
        self.assertAlmostEqual(sim_compression('Neil', 'Niall', 'arith'),
1397
                               0.3125)
1398
        self.assertAlmostEqual(sim_compression('Njáll', 'Njall', 'arith',
1399
                                               self.arith_dict),
1400
                               0.285714285714285)
1401
        self.assertAlmostEqual(sim_compression('Njall', 'Njáll', 'arith',
1402
                                               self.arith_dict),
1403
                               0.285714285714285)
1404
        self.assertAlmostEqual(sim_compression('Njáll', 'Njall', 'arith'),
1405
                               0.25)
1406
        self.assertAlmostEqual(sim_compression('Njall', 'Njáll', 'arith'),
1407
                               0.25)
1408
1409
    def test_sim_compression_rle(self):
1410
        """Test abydos.distance.sim_compression (RLE & BWT+RLE)."""
1411
        self.assertAlmostEqual(sim_compression('abc', 'abc', 'rle'), 1)
1412
        self.assertAlmostEqual(sim_compression('abc', 'def', 'rle'), 0)
1413
1414
        self.assertAlmostEqual(sim_compression('abc', 'abc', 'bwtrle'), 1)
1415
        self.assertAlmostEqual(sim_compression('abc', 'def', 'bwtrle'), 0.25)
1416
1417
        self.assertAlmostEqual(sim_compression('aaa', 'bbaaa', 'rle'), 0.5)
1418
        self.assertAlmostEqual(sim_compression('abb', 'bbba', 'rle'), 2/3)
1419
        self.assertAlmostEqual(sim_compression('banana', 'banane', 'bwtrle'),
1420
                               0.42857142857)
1421
        self.assertAlmostEqual(sim_compression('bananas', 'bananen', 'bwtrle'),
1422
                               0.5)
1423
1424
1425
class MongeElkanTestCases(unittest.TestCase):
1426
    """Test Monge-Elkan functions.
1427
1428
    abydos.distance.sim_monge_elkan & .dist_monge_elkan
1429
    """
1430
1431
    def test_sim_monge_elkan(self):
1432
        """Test abydos.distance.sim_monge_elkan."""
1433
        self.assertEqual(sim_monge_elkan('', ''), 1)
1434
        self.assertEqual(sim_monge_elkan('', 'a'), 0)
1435
        self.assertEqual(sim_monge_elkan('a', 'a'), 1)
1436
1437
        self.assertEqual(sim_monge_elkan('Niall', 'Neal'), 3/4)
1438
        self.assertEqual(sim_monge_elkan('Niall', 'Njall'), 5/6)
1439
        self.assertEqual(sim_monge_elkan('Niall', 'Niel'), 3/4)
1440
        self.assertEqual(sim_monge_elkan('Niall', 'Nigel'), 3/4)
1441
1442
        self.assertEqual(sim_monge_elkan('Niall', 'Neal', symmetric=True),
1443
                         31/40)
1444
        self.assertEqual(sim_monge_elkan('Niall', 'Njall', symmetric=True),
1445
                         5/6)
1446
        self.assertEqual(sim_monge_elkan('Niall', 'Niel', symmetric=True),
1447
                         31/40)
1448
        self.assertAlmostEqual(sim_monge_elkan('Niall', 'Nigel',
1449
                                               symmetric=True), 17/24)
1450
1451
    def test_dist_monge_elkan(self):
1452
        """Test abydos.distance.dist_monge_elkan."""
1453
        self.assertEqual(dist_monge_elkan('', ''), 0)
1454
        self.assertEqual(dist_monge_elkan('', 'a'), 1)
1455
1456
        self.assertEqual(dist_monge_elkan('Niall', 'Neal'), 1/4)
1457
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Njall'), 1/6)
1458
        self.assertEqual(dist_monge_elkan('Niall', 'Niel'), 1/4)
1459
        self.assertEqual(dist_monge_elkan('Niall', 'Nigel'), 1/4)
1460
1461
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Neal',
1462
                                                symmetric=True), 9/40)
1463
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Njall',
1464
                                                symmetric=True), 1/6)
1465
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Niel',
1466
                                                symmetric=True), 9/40)
1467
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Nigel',
1468
                                                symmetric=True), 7/24)
1469
1470
1471
class IdentityTestCases(unittest.TestCase):
1472
    """Test identity similarity functions.
1473
1474
    abydos.distance.sim_ident & .dist_ident
1475
    """
1476
1477
    def test_sim_ident(self):
1478
        """Test abydos.distance.sim_ident."""
1479
        self.assertEqual(sim_ident('', ''), 1)
1480
        self.assertEqual(sim_ident('', 'a'), 0)
1481
        self.assertEqual(sim_ident('a', ''), 0)
1482
        self.assertEqual(sim_ident('a', 'a'), 1)
1483
        self.assertEqual(sim_ident('abcd', 'abcd'), 1)
1484
        self.assertEqual(sim_ident('abcd', 'dcba'), 0)
1485
        self.assertEqual(sim_ident('abc', 'cba'), 0)
1486
1487
    def test_dist_ident(self):
1488
        """Test abydos.distance.dist_ident."""
1489
        self.assertEqual(dist_ident('', ''), 0)
1490
        self.assertEqual(dist_ident('', 'a'), 1)
1491
        self.assertEqual(dist_ident('a', ''), 1)
1492
        self.assertEqual(dist_ident('a', 'a'), 0)
1493
        self.assertEqual(dist_ident('abcd', 'abcd'), 0)
1494
        self.assertEqual(dist_ident('abcd', 'dcba'), 1)
1495
        self.assertEqual(dist_ident('abc', 'cba'), 1)
1496
1497
1498
def _sim_wikipedia(src, tar):
1499
    """Return a similarity score for two DNA base pairs.
1500
1501
    Values copied from:
1502
    https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm
1503
    """
1504
    nw_matrix = {('A', 'A'): 10, ('G', 'G'): 7, ('C', 'C'): 9, ('T', 'T'): 8,
1505
                 ('A', 'G'): -1, ('A', 'C'): -3, ('A', 'T'): -4,
1506
                 ('G', 'C'): -5, ('G', 'T'): -3, ('C', 'T'): 0}
1507
    return sim_matrix(src, tar, nw_matrix, symmetric=True, alphabet='CGAT')
1508
1509
1510
def _sim_nw(src, tar):
1511
    """Return 1 if src is tar, otherwise -1."""
1512
    return 2*float(src is tar)-1
1513
1514
1515
class MatrixSimTestCases(unittest.TestCase):
1516
    """Test matrix similarity functions.
1517
1518
    abydos.distance.sim_matrix
1519
    """
1520
1521
    def test_sim_matrix(self):
1522
        """Test abydos.distance.sim_matrix."""
1523
        self.assertEqual(sim_matrix('', ''), 1)
1524
        self.assertEqual(sim_matrix('', 'a'), 0)
1525
        self.assertEqual(sim_matrix('a', ''), 0)
1526
        self.assertEqual(sim_matrix('a', 'a'), 1)
1527
        self.assertEqual(sim_matrix('abcd', 'abcd'), 1)
1528
        self.assertEqual(sim_matrix('abcd', 'dcba'), 0)
1529
        self.assertEqual(sim_matrix('abc', 'cba'), 0)
1530
1531
        # https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
1532
        self.assertEqual(_sim_wikipedia('A', 'C'), -3)
1533
        self.assertEqual(_sim_wikipedia('G', 'G'), 7)
1534
        self.assertEqual(_sim_wikipedia('A', 'A'), 10)
1535
        self.assertEqual(_sim_wikipedia('T', 'A'), -4)
1536
        self.assertEqual(_sim_wikipedia('T', 'C'), 0)
1537
        self.assertEqual(_sim_wikipedia('A', 'G'), -1)
1538
        self.assertEqual(_sim_wikipedia('C', 'T'), 0)
1539
1540
        self.assertRaises(ValueError, sim_matrix, 'abc', 'cba', alphabet='ab')
1541
        self.assertRaises(ValueError, sim_matrix, 'abc', 'ba', alphabet='ab')
1542
        self.assertRaises(ValueError, sim_matrix, 'ab', 'cba', alphabet='ab')
1543
1544
1545
class NeedlemanWunschTestCases(unittest.TestCase):
1546
    """Test Needleman-Wunsch functions.
1547
1548
    abydos.distance.needleman_wunsch
1549
    """
1550
1551
    def test_needleman_wunsch(self):
1552
        """Test abydos.distance.needleman_wunsch."""
1553
        self.assertEqual(needleman_wunsch('', ''), 0)
1554
1555
        # https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
1556
        self.assertEqual(needleman_wunsch('GATTACA', 'GCATGCU',
1557
                                          1, _sim_nw), 0)
1558
        self.assertEqual(needleman_wunsch('AGACTAGTTAC', 'CGAGACGT',
1559
                                          5, _sim_wikipedia), 16)
1560
1561
        # checked against http://ds9a.nl/nwunsch/ (mismatch=1, gap=5, skew=5)
1562
        self.assertEqual(needleman_wunsch('CGATATCAG', 'TGACGSTGC',
1563
                                          5, _sim_nw), -5)
1564
        self.assertEqual(needleman_wunsch('AGACTAGTTAC', 'TGACGSTGC',
1565
                                          5, _sim_nw), -7)
1566
        self.assertEqual(needleman_wunsch('AGACTAGTTAC', 'CGAGACGT',
1567
                                          5, _sim_nw), -15)
1568
1569
    def test_needleman_wunsch_nialls(self):
1570
        """Test abydos.distance.needleman_wunsch (Nialls set)."""
1571
        # checked against http://ds9a.nl/nwunsch/ (mismatch=1, gap=2, skew=2)
1572
        nw_vals = (5, 0, -2, 3, 1, 1, -2, -2, -1, -3, -3, -5, -3, -7, -7, -19)
1573
        for i in range(len(NIALL)):
1574
            self.assertEqual(needleman_wunsch(NIALL[0], NIALL[i], 2,
1575
                                              _sim_nw), nw_vals[i])
1576
1577
1578
class SmithWatermanTestCases(unittest.TestCase):
1579
    """Test Smith-Waterman functions.
1580
1581
    abydos.distance.smith_waterman
1582
    """
1583
1584
    def test_smith_waterman(self):
1585
        """Test abydos.distance.smith_waterman."""
1586
        self.assertEqual(smith_waterman('', ''), 0)
1587
1588
        # https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
1589
        self.assertEqual(smith_waterman('GATTACA', 'GCATGCU',
1590
                                        1, _sim_nw), 0)
1591
        self.assertEqual(smith_waterman('AGACTAGTTAC', 'CGAGACGT',
1592
                                        5, _sim_wikipedia), 26)
1593
1594
        self.assertEqual(smith_waterman('CGATATCAG', 'TGACGSTGC',
1595
                                        5, _sim_nw), 0)
1596
        self.assertEqual(smith_waterman('AGACTAGTTAC', 'TGACGSTGC',
1597
                                        5, _sim_nw), 1)
1598
        self.assertEqual(smith_waterman('AGACTAGTTAC', 'CGAGACGT',
1599
                                        5, _sim_nw), 0)
1600
1601
    def test_smith_waterman_nialls(self):
1602
        """Test abydos.distance.smith_waterman (Nialls set)."""
1603
        sw_vals = (5, 1, 1, 3, 2, 1, 1, 0, 0, 1, 1, 2, 2, 1, 0, 0)
1604
        for i in range(len(NIALL)):
1605
            self.assertEqual(smith_waterman(NIALL[0], NIALL[i], 2,
1606
                                            _sim_nw), sw_vals[i])
1607
1608
1609
class GotohTestCases(unittest.TestCase):
1610
    """Test Gotoh functions.
1611
1612
    abydos.distance.gotoh
1613
    """
1614
1615
    def test_gotoh(self):
1616
        """Test abydos.distance.needleman_wunsch_affine."""
1617
        self.assertEqual(gotoh('', ''), 0)
1618
1619
        # https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
1620
        self.assertEqual(gotoh('GATTACA', 'GCATGCU', 1, 1, _sim_nw), 0)
1621
        self.assertGreaterEqual(gotoh('GATTACA', 'GCATGCU', 1, 0.5, _sim_nw),
1622
                                needleman_wunsch('GATTACA', 'GCATGCU', 1,
1623
                                                 _sim_nw))
1624
        self.assertEqual(gotoh('AGACTAGTTAC', 'CGAGACGT', 5, 5,
1625
                               _sim_wikipedia), 16)
1626
        self.assertGreaterEqual(gotoh('AGACTAGTTAC', 'CGAGACGT', 5, 2,
1627
                                      _sim_wikipedia),
1628
                                needleman_wunsch('AGACTAGTTAC', 'CGAGACGT', 5,
1629
                                                 _sim_wikipedia))
1630
1631
        # checked against http://ds9a.nl/nwunsch/ (mismatch=1, gap=5, skew=5)
1632
        self.assertEqual(gotoh('CGATATCAG', 'TGACGSTGC', 5, 5, _sim_nw), -5)
1633
        self.assertGreaterEqual(gotoh('CGATATCAG', 'TGACGSTGC', 5, 2, _sim_nw),
1634
                                needleman_wunsch('CGATATCAG', 'TGACGSTGC', 5,
1635
                                                 _sim_nw))
1636
        self.assertEqual(gotoh('AGACTAGTTAC', 'TGACGSTGC', 5, 5, _sim_nw), -7)
1637
        self.assertGreaterEqual(gotoh('AGACTAGTTAC', 'TGACGSTGC', 5, 2,
1638
                                      _sim_nw),
1639
                                needleman_wunsch('AGACTAGTTAC', 'TGACGSTGC', 5,
1640
                                                 _sim_nw))
1641
        self.assertEqual(gotoh('AGACTAGTTAC', 'CGAGACGT', 5, 5, _sim_nw), -15)
1642
        self.assertGreaterEqual(gotoh('AGACTAGTTAC', 'CGAGACGT', 5, 2,
1643
                                      _sim_nw),
1644
                                needleman_wunsch('AGACTAGTTAC', 'CGAGACGT', 5,
1645
                                                 _sim_nw))
1646
1647
    def test_gotoh_nialls(self):
1648
        """Test abydos.distance.gotoh (Nialls set)."""
1649
        # checked against http://ds9a.nl/nwunsch/ (mismatch=1, gap=2, skew=2)
1650
        nw_vals = (5, 0, -2, 3, 1, 1, -2, -2, -1, -3, -3, -5, -3, -7, -7, -19)
1651
        for i in range(len(NIALL)):
1652
            self.assertEqual(gotoh(NIALL[0], NIALL[i], 2, 2, _sim_nw),
1653
                             nw_vals[i])
1654
        nw_vals2 = (5, 0, -2, 3, 1, 1, -2, -2, -1, -2, -3, -3, -2, -6, -6, -8)
1655
        for i in range(len(NIALL)):
1656
            self.assertEqual(gotoh(NIALL[0], NIALL[i], 2, 1, _sim_nw),
1657
                             nw_vals2[i])
1658
            self.assertGreaterEqual(gotoh(NIALL[0], NIALL[i], 2, 0.5, _sim_nw),
1659
                                    needleman_wunsch(NIALL[0], NIALL[i], 2,
1660
                                                     _sim_nw))
1661
1662
1663
class LengthTestCases(unittest.TestCase):
1664
    """Test length similarity functions.
1665
1666
    abydos.distance.sim_length & .dist_length
1667
    """
1668
1669
    def test_sim_ident(self):
1670
        """Test abydos.distance.sim_length."""
1671
        self.assertEqual(sim_length('', ''), 1)
1672
        self.assertEqual(sim_length('', 'a'), 0)
1673
        self.assertEqual(sim_length('a', ''), 0)
1674
        self.assertEqual(sim_length('a', 'a'), 1)
1675
        self.assertEqual(sim_length('abcd', 'abcd'), 1)
1676
        self.assertEqual(sim_length('abcd', 'dcba'), 1)
1677
        self.assertEqual(sim_length('abc', 'cba'), 1)
1678
        self.assertEqual(sim_length('abc', 'dcba'), 0.75)
1679
        self.assertEqual(sim_length('abcd', 'cba'), 0.75)
1680
        self.assertEqual(sim_length('ab', 'dcba'), 0.5)
1681
        self.assertEqual(sim_length('abcd', 'ba'), 0.5)
1682
1683
    def test_dist_ident(self):
1684
        """Test abydos.distance.dist_length."""
1685
        self.assertEqual(dist_length('', ''), 0)
1686
        self.assertEqual(dist_length('', 'a'), 1)
1687
        self.assertEqual(dist_length('a', ''), 1)
1688
        self.assertEqual(dist_length('a', 'a'), 0)
1689
        self.assertEqual(dist_length('abcd', 'abcd'), 0)
1690
        self.assertEqual(dist_length('abcd', 'dcba'), 0)
1691
        self.assertEqual(dist_length('abc', 'cba'), 0)
1692
        self.assertEqual(dist_length('abc', 'dcba'), 0.25)
1693
        self.assertEqual(dist_length('abcd', 'cba'), 0.25)
1694
        self.assertEqual(dist_length('ab', 'dcba'), 0.5)
1695
        self.assertEqual(dist_length('abcd', 'ba'), 0.5)
1696
1697
1698
class PrefixTestCases(unittest.TestCase):
1699
    """Test prefix similarity functions.
1700
1701
    abydos.distance.sim_prefix & .dist_prefix
1702
    """
1703
1704
    def test_sim_prefix(self):
1705
        """Test abydos.distance.sim_prefix."""
1706
        self.assertEqual(sim_prefix('', ''), 1)
1707
        self.assertEqual(sim_prefix('a', ''), 0)
1708
        self.assertEqual(sim_prefix('', 'a'), 0)
1709
        self.assertEqual(sim_prefix('a', 'a'), 1)
1710
        self.assertEqual(sim_prefix('ax', 'a'), 1)
1711
        self.assertEqual(sim_prefix('axx', 'a'), 1)
1712
        self.assertEqual(sim_prefix('ax', 'ay'), 1/2)
1713
        self.assertEqual(sim_prefix('a', 'ay'), 1)
1714
        self.assertEqual(sim_prefix('a', 'ayy'), 1)
1715
        self.assertEqual(sim_prefix('ax', 'ay'), 1/2)
1716
        self.assertEqual(sim_prefix('a', 'y'), 0)
1717
        self.assertEqual(sim_prefix('y', 'a'), 0)
1718
        self.assertEqual(sim_prefix('aaax', 'aaa'), 1)
1719
        self.assertAlmostEqual(sim_prefix('axxx', 'aaa'), 1/3)
1720
        self.assertEqual(sim_prefix('aaxx', 'aayy'), 1/2)
1721
        self.assertEqual(sim_prefix('xxaa', 'yyaa'), 0)
1722
        self.assertAlmostEqual(sim_prefix('aaxxx', 'aay'), 2/3)
1723
        self.assertEqual(sim_prefix('aaxxxx', 'aayyy'), 2/5)
1724
        self.assertEqual(sim_prefix('xa', 'a'), 0)
1725
        self.assertEqual(sim_prefix('xxa', 'a'), 0)
1726
        self.assertEqual(sim_prefix('xa', 'ya'), 0)
1727
        self.assertEqual(sim_prefix('a', 'ya'), 0)
1728
        self.assertEqual(sim_prefix('a', 'yya'), 0)
1729
        self.assertEqual(sim_prefix('xa', 'ya'), 0)
1730
        self.assertEqual(sim_prefix('xaaa', 'aaa'), 0)
1731
        self.assertEqual(sim_prefix('xxxa', 'aaa'), 0)
1732
        self.assertEqual(sim_prefix('xxxaa', 'yaa'), 0)
1733
        self.assertEqual(sim_prefix('xxxxaa', 'yyyaa'), 0)
1734
1735
    def test_dist_prefix(self):
1736
        """Test abydos.distance.dist_prefix."""
1737
        self.assertEqual(dist_prefix('', ''), 0)
1738
        self.assertEqual(dist_prefix('a', ''), 1)
1739
        self.assertEqual(dist_prefix('', 'a'), 1)
1740
        self.assertEqual(dist_prefix('a', 'a'), 0)
1741
        self.assertEqual(dist_prefix('ax', 'a'), 0)
1742
        self.assertEqual(dist_prefix('axx', 'a'), 0)
1743
        self.assertEqual(dist_prefix('ax', 'ay'), 1/2)
1744
        self.assertEqual(dist_prefix('a', 'ay'), 0)
1745
        self.assertEqual(dist_prefix('a', 'ayy'), 0)
1746
        self.assertEqual(dist_prefix('ax', 'ay'), 1/2)
1747
        self.assertEqual(dist_prefix('a', 'y'), 1)
1748
        self.assertEqual(dist_prefix('y', 'a'), 1)
1749
        self.assertEqual(dist_prefix('aaax', 'aaa'), 0)
1750
        self.assertAlmostEqual(dist_prefix('axxx', 'aaa'), 2/3)
1751
        self.assertEqual(dist_prefix('aaxx', 'aayy'), 1/2)
1752
        self.assertEqual(dist_prefix('xxaa', 'yyaa'), 1)
1753
        self.assertAlmostEqual(dist_prefix('aaxxx', 'aay'), 1/3)
1754
        self.assertEqual(dist_prefix('aaxxxx', 'aayyy'), 3/5)
1755
        self.assertEqual(dist_prefix('xa', 'a'), 1)
1756
        self.assertEqual(dist_prefix('xxa', 'a'), 1)
1757
        self.assertEqual(dist_prefix('xa', 'ya'), 1)
1758
        self.assertEqual(dist_prefix('a', 'ya'), 1)
1759
        self.assertEqual(dist_prefix('a', 'yya'), 1)
1760
        self.assertEqual(dist_prefix('xa', 'ya'), 1)
1761
        self.assertEqual(dist_prefix('xaaa', 'aaa'), 1)
1762
        self.assertEqual(dist_prefix('xxxa', 'aaa'), 1)
1763
        self.assertEqual(dist_prefix('xxxaa', 'yaa'), 1)
1764
        self.assertEqual(dist_prefix('xxxxaa', 'yyyaa'), 1)
1765
1766
1767
class SuffixTestCases(unittest.TestCase):
1768
    """Test suffix similarity functions.
1769
1770
    abydos.distance.sim_suffix & .dist_suffix
1771
    """
1772
1773
    def test_sim_suffix(self):
1774
        """Test abydos.distance.sim_suffix."""
1775
        self.assertEqual(sim_suffix('', ''), 1)
1776
        self.assertEqual(sim_suffix('a', ''), 0)
1777
        self.assertEqual(sim_suffix('', 'a'), 0)
1778
        self.assertEqual(sim_suffix('a', 'a'), 1)
1779
        self.assertEqual(sim_suffix('ax', 'a'), 0)
1780
        self.assertEqual(sim_suffix('axx', 'a'), 0)
1781
        self.assertEqual(sim_suffix('ax', 'ay'), 0)
1782
        self.assertEqual(sim_suffix('a', 'ay'), 0)
1783
        self.assertEqual(sim_suffix('a', 'ayy'), 0)
1784
        self.assertEqual(sim_suffix('ax', 'ay'), 0)
1785
        self.assertEqual(sim_suffix('a', 'y'), 0)
1786
        self.assertEqual(sim_suffix('y', 'a'), 0)
1787
        self.assertEqual(sim_suffix('aaax', 'aaa'), 0)
1788
        self.assertEqual(sim_suffix('axxx', 'aaa'), 0)
1789
        self.assertEqual(sim_suffix('aaxx', 'aayy'), 0)
1790
        self.assertEqual(sim_suffix('xxaa', 'yyaa'), 1/2)
1791
        self.assertEqual(sim_suffix('aaxxx', 'aay'), 0)
1792
        self.assertEqual(sim_suffix('aaxxxx', 'aayyy'), 0)
1793
        self.assertEqual(sim_suffix('xa', 'a'), 1)
1794
        self.assertEqual(sim_suffix('xxa', 'a'), 1)
1795
        self.assertEqual(sim_suffix('xa', 'ya'), 1/2)
1796
        self.assertEqual(sim_suffix('a', 'ya'), 1)
1797
        self.assertEqual(sim_suffix('a', 'yya'), 1)
1798
        self.assertEqual(sim_suffix('xa', 'ya'), 1/2)
1799
        self.assertEqual(sim_suffix('xaaa', 'aaa'), 1)
1800
        self.assertAlmostEqual(sim_suffix('xxxa', 'aaa'), 1/3)
1801
        self.assertAlmostEqual(sim_suffix('xxxaa', 'yaa'), 2/3)
1802
        self.assertEqual(sim_suffix('xxxxaa', 'yyyaa'), 2/5)
1803
1804
    def test_dist_suffix(self):
1805
        """Test abydos.distance.dist_suffix."""
1806
        self.assertEqual(dist_suffix('', ''), 0)
1807
        self.assertEqual(dist_suffix('a', ''), 1)
1808
        self.assertEqual(dist_suffix('', 'a'), 1)
1809
        self.assertEqual(dist_suffix('a', 'a'), 0)
1810
        self.assertEqual(dist_suffix('ax', 'a'), 1)
1811
        self.assertEqual(dist_suffix('axx', 'a'), 1)
1812
        self.assertEqual(dist_suffix('ax', 'ay'), 1)
1813
        self.assertEqual(dist_suffix('a', 'ay'), 1)
1814
        self.assertEqual(dist_suffix('a', 'ayy'), 1)
1815
        self.assertEqual(dist_suffix('ax', 'ay'), 1)
1816
        self.assertEqual(dist_suffix('a', 'y'), 1)
1817
        self.assertEqual(dist_suffix('y', 'a'), 1)
1818
        self.assertEqual(dist_suffix('aaax', 'aaa'), 1)
1819
        self.assertEqual(dist_suffix('axxx', 'aaa'), 1)
1820
        self.assertEqual(dist_suffix('aaxx', 'aayy'), 1)
1821
        self.assertEqual(dist_suffix('xxaa', 'yyaa'), 1/2)
1822
        self.assertEqual(dist_suffix('aaxxx', 'aay'), 1)
1823
        self.assertEqual(dist_suffix('aaxxxx', 'aayyy'), 1)
1824
        self.assertEqual(dist_suffix('xa', 'a'), 0)
1825
        self.assertEqual(dist_suffix('xxa', 'a'), 0)
1826
        self.assertEqual(dist_suffix('xa', 'ya'), 1/2)
1827
        self.assertEqual(dist_suffix('a', 'ya'), 0)
1828
        self.assertEqual(dist_suffix('a', 'yya'), 0)
1829
        self.assertEqual(dist_suffix('xa', 'ya'), 1/2)
1830
        self.assertEqual(dist_suffix('xaaa', 'aaa'), 0)
1831
        self.assertAlmostEqual(dist_suffix('xxxa', 'aaa'), 2/3)
1832
        self.assertAlmostEqual(dist_suffix('xxxaa', 'yaa'), 1/3)
1833
        self.assertEqual(dist_suffix('xxxxaa', 'yyyaa'), 3/5)
1834
1835
1836
class MLIPNSTestCases(unittest.TestCase):
1837
    """Test MLIPNS functions.
1838
1839
    abydos.distance.sim_mlipns & .dist_mlipns
1840
    """
1841
1842
    def test_sim_mlipns(self):
1843
        """Test abydos.distance.sim_mlipns."""
1844
        self.assertEqual(sim_mlipns('', ''), 1)
1845
        self.assertEqual(sim_mlipns('a', ''), 0)
1846
        self.assertEqual(sim_mlipns('', 'a'), 0)
1847
        self.assertEqual(sim_mlipns('a', 'a'), 1)
1848
        self.assertEqual(sim_mlipns('ab', 'a'), 1)
1849
        self.assertEqual(sim_mlipns('abc', 'abc'), 1)
1850
        self.assertEqual(sim_mlipns('abc', 'abcde'), 1)
1851
        self.assertEqual(sim_mlipns('abcg', 'abcdeg'), 1)
1852
        self.assertEqual(sim_mlipns('abcg', 'abcdefg'), 0)
1853
        self.assertEqual(sim_mlipns('Tomato', 'Tamato'), 1)
1854
        self.assertEqual(sim_mlipns('ato', 'Tam'), 1)
1855
1856
    def test_dist_mlipns(self):
1857
        """Test abydos.distance.dist_mlipns."""
1858
        self.assertEqual(dist_mlipns('', ''), 0)
1859
        self.assertEqual(dist_mlipns('a', ''), 1)
1860
        self.assertEqual(dist_mlipns('', 'a'), 1)
1861
        self.assertEqual(dist_mlipns('a', 'a'), 0)
1862
        self.assertEqual(dist_mlipns('ab', 'a'), 0)
1863
        self.assertEqual(dist_mlipns('abc', 'abc'), 0)
1864
        self.assertEqual(dist_mlipns('abc', 'abcde'), 0)
1865
        self.assertEqual(dist_mlipns('abcg', 'abcdeg'), 0)
1866
        self.assertEqual(dist_mlipns('abcg', 'abcdefg'), 1)
1867
        self.assertEqual(dist_mlipns('Tomato', 'Tamato'), 0)
1868
        self.assertEqual(dist_mlipns('ato', 'Tam'), 0)
1869
1870
1871
class BagTestCases(unittest.TestCase):
1872
    """Test bag similarity functions.
1873
1874
    abydos.distance.bag, .sim_bag & .dist_bag
1875
    """
1876
1877
    def test_bag(self):
1878
        """Test abydos.distance.bag."""
1879
        self.assertEqual(bag('', ''), 0)
1880
        self.assertEqual(bag('nelson', ''), 6)
1881
        self.assertEqual(bag('', 'neilsen'), 7)
1882
        self.assertEqual(bag('ab', 'a'), 1)
1883
        self.assertEqual(bag('ab', 'c'), 2)
1884
        self.assertEqual(bag('nelson', 'neilsen'), 2)
1885
        self.assertEqual(bag('neilsen', 'nelson'), 2)
1886
        self.assertEqual(bag('niall', 'neal'), 2)
1887
        self.assertEqual(bag('aluminum', 'Catalan'), 5)
1888
        self.assertEqual(bag('abcdefg', 'hijklm'), 7)
1889
        self.assertEqual(bag('abcdefg', 'hijklmno'), 8)
1890
1891
    def test_sim_bag(self):
1892
        """Test abydos.distance.sim_bag."""
1893
        self.assertEqual(sim_bag('', ''), 1)
1894
        self.assertEqual(sim_bag('nelson', ''), 0)
1895
        self.assertEqual(sim_bag('', 'neilsen'), 0)
1896
        self.assertEqual(sim_bag('ab', 'a'), 0.5)
1897
        self.assertEqual(sim_bag('ab', 'c'), 0)
1898
        self.assertAlmostEqual(sim_bag('nelson', 'neilsen'), 5/7)
1899
        self.assertAlmostEqual(sim_bag('neilsen', 'nelson'), 5/7)
1900
        self.assertAlmostEqual(sim_bag('niall', 'neal'), 3/5)
1901
        self.assertAlmostEqual(sim_bag('aluminum', 'Catalan'), 3/8)
1902
        self.assertEqual(sim_bag('abcdefg', 'hijklm'), 0)
1903
        self.assertEqual(sim_bag('abcdefg', 'hijklmno'), 0)
1904
1905
    def test_dist_bag(self):
1906
        """Test abydos.distance.dist_bag."""
1907
        self.assertEqual(dist_bag('', ''), 0)
1908
        self.assertEqual(dist_bag('nelson', ''), 1)
1909
        self.assertEqual(dist_bag('', 'neilsen'), 1)
1910
        self.assertEqual(dist_bag('ab', 'a'), 0.5)
1911
        self.assertEqual(dist_bag('ab', 'c'), 1)
1912
        self.assertAlmostEqual(dist_bag('nelson', 'neilsen'), 2/7)
1913
        self.assertAlmostEqual(dist_bag('neilsen', 'nelson'), 2/7)
1914
        self.assertAlmostEqual(dist_bag('niall', 'neal'), 2/5)
1915
        self.assertAlmostEqual(dist_bag('aluminum', 'Catalan'), 5/8)
1916
        self.assertEqual(dist_bag('abcdefg', 'hijklm'), 1)
1917
        self.assertEqual(dist_bag('abcdefg', 'hijklmno'), 1)
1918
1919
1920
class EditexTestCases(unittest.TestCase):
1921
    """Test Editex functions.
1922
1923
    abydos.distance.editex, .sim_editex & .dist_editex
1924
    """
1925
1926
    def test_editex(self):
1927
        """Test abydos.distance.editex."""
1928
        self.assertEqual(editex('', ''), 0)
1929
        self.assertEqual(editex('nelson', ''), 12)
1930
        self.assertEqual(editex('', 'neilsen'), 14)
1931
        self.assertEqual(editex('ab', 'a'), 2)
1932
        self.assertEqual(editex('ab', 'c'), 4)
1933
        self.assertEqual(editex('nelson', 'neilsen'), 2)
1934
        self.assertEqual(editex('neilsen', 'nelson'), 2)
1935
        self.assertEqual(editex('niall', 'neal'), 1)
1936
        self.assertEqual(editex('neal', 'niall'), 1)
1937
        self.assertEqual(editex('niall', 'nihal'), 2)
1938
        self.assertEqual(editex('nihal', 'niall'), 2)
1939
        self.assertEqual(editex('neal', 'nihl'), 3)
1940
        self.assertEqual(editex('nihl', 'neal'), 3)
1941
1942
    def test_editex_local(self):
1943
        """Test abydos.distance.editex (local variant)."""
1944
        self.assertEqual(editex('', '', local=True), 0)
1945
        self.assertEqual(editex('nelson', '', local=True), 12)
1946
        self.assertEqual(editex('', 'neilsen', local=True), 14)
1947
        self.assertEqual(editex('ab', 'a', local=True), 2)
1948
        self.assertEqual(editex('ab', 'c', local=True), 2)
1949
        self.assertEqual(editex('nelson', 'neilsen', local=True), 2)
1950
        self.assertEqual(editex('neilsen', 'nelson', local=True), 2)
1951
        self.assertEqual(editex('niall', 'neal', local=True), 1)
1952
        self.assertEqual(editex('neal', 'niall', local=True), 1)
1953
        self.assertEqual(editex('niall', 'nihal', local=True), 2)
1954
        self.assertEqual(editex('nihal', 'niall', local=True), 2)
1955
        self.assertEqual(editex('neal', 'nihl', local=True), 3)
1956
        self.assertEqual(editex('nihl', 'neal', local=True), 3)
1957
1958
    def test_sim_editex(self):
1959
        """Test abydos.distance.sim_editex."""
1960
        self.assertEqual(sim_editex('', ''), 1)
1961
        self.assertEqual(sim_editex('nelson', ''), 0)
1962
        self.assertEqual(sim_editex('', 'neilsen'), 0)
1963
        self.assertEqual(sim_editex('ab', 'a'), 0.5)
1964
        self.assertEqual(sim_editex('ab', 'c'), 0)
1965
        self.assertAlmostEqual(sim_editex('nelson', 'neilsen'), 12/14)
1966
        self.assertAlmostEqual(sim_editex('neilsen', 'nelson'), 12/14)
1967
        self.assertEqual(sim_editex('niall', 'neal'), 0.9)
1968
1969
    def test_dist_editex(self):
1970
        """Test abydos.distance.dist_editex."""
1971
        self.assertEqual(dist_editex('', ''), 0)
1972
        self.assertEqual(dist_editex('nelson', ''), 1)
1973
        self.assertEqual(dist_editex('', 'neilsen'), 1)
1974
        self.assertEqual(dist_editex('ab', 'a'), 0.5)
1975
        self.assertEqual(dist_editex('ab', 'c'), 1)
1976
        self.assertAlmostEqual(dist_editex('nelson', 'neilsen'), 2/14)
1977
        self.assertAlmostEqual(dist_editex('neilsen', 'nelson'), 2/14)
1978
        self.assertEqual(dist_editex('niall', 'neal'), 0.1)
1979
1980
1981
class BaystatTestCases(unittest.TestCase):
1982
    """Test Editex functions.
1983
1984
    abydos.distance.sim_baystat & .dist_baystat
1985
    """
1986
1987
    def test_sim_baystat(self):
1988
        """Test abydos.distance.sim_editex."""
1989
        # Base cases
1990
        self.assertEqual(sim_baystat('', ''), 1)
1991
        self.assertEqual(sim_baystat('Colin', ''), 0)
1992
        self.assertEqual(sim_baystat('Colin', 'Colin'), 1)
1993
1994
        # Examples given in the paper
1995
        # https://www.statistik.bayern.de/medien/statistik/zensus/zusammenf__hrung_von_datenbest__nden_ohne_numerische_identifikatoren.pdf
1996
        self.assertAlmostEqual(sim_baystat('DRAKOMENA', 'DRAOMINA'), 7/9)
1997
        self.assertAlmostEqual(sim_baystat('RIEKI', 'RILKI'), 4/5)
1998
        self.assertAlmostEqual(sim_baystat('ATANASSIONI', 'ATANASIOU'), 8/11)
1999
        self.assertAlmostEqual(sim_baystat('LIESKOVSKY', 'LIESZKOVSZKY'),
2000
                               10/12)
2001
        self.assertAlmostEqual(sim_baystat('JEANETTE', 'JEANNETTE'), 8/9)
2002
        self.assertAlmostEqual(sim_baystat('JOHANNES', 'JOHAN'), 0.625)
2003
        self.assertAlmostEqual(sim_baystat('JOHANNES', 'HANS'), 0.375)
2004
        self.assertAlmostEqual(sim_baystat('JOHANNES', 'HANNES'), 0.75)
2005
        self.assertAlmostEqual(sim_baystat('ZIMMERMANN', 'SEMMERMANN'), 0.8)
2006
        self.assertAlmostEqual(sim_baystat('ZIMMERMANN', 'ZIMMERER'), 0.6)
2007
        self.assertAlmostEqual(sim_baystat('ZIMMERMANN', 'ZIMMER'), 0.6)
2008
2009
    def test_dist_baystat(self):
2010
        """Test abydos.distance.dist_editex."""
2011
        # Base cases
2012
        self.assertEqual(dist_baystat('', ''), 0)
2013
        self.assertEqual(dist_baystat('Colin', ''), 1)
2014
        self.assertEqual(dist_baystat('Colin', 'Colin'), 0)
2015
2016
        # Examples given in the paper
2017
        # https://www.statistik.bayern.de/medien/statistik/zensus/zusammenf__hrung_von_datenbest__nden_ohne_numerische_identifikatoren.pdf
2018
        self.assertAlmostEqual(dist_baystat('DRAKOMENA', 'DRAOMINA'), 2/9)
2019
        self.assertAlmostEqual(dist_baystat('RIEKI', 'RILKI'), 1/5)
2020
        self.assertAlmostEqual(dist_baystat('ATANASSIONI', 'ATANASIOU'), 3/11)
2021
        self.assertAlmostEqual(dist_baystat('LIESKOVSKY', 'LIESZKOVSZKY'),
2022
                               2/12)
2023
        self.assertAlmostEqual(dist_baystat('JEANETTE', 'JEANNETTE'), 1/9)
2024
        self.assertAlmostEqual(dist_baystat('JOHANNES', 'JOHAN'), 0.375)
2025
        self.assertAlmostEqual(dist_baystat('JOHANNES', 'HANS'), 0.625)
2026
        self.assertAlmostEqual(dist_baystat('JOHANNES', 'HANNES'), 0.25)
2027
        self.assertAlmostEqual(dist_baystat('ZIMMERMANN', 'SEMMERMANN'), 0.2)
2028
        self.assertAlmostEqual(dist_baystat('ZIMMERMANN', 'ZIMMERER'), 0.4)
2029
        self.assertAlmostEqual(dist_baystat('ZIMMERMANN', 'ZIMMER'), 0.4)
2030
2031
2032
class SimTFIDFTestCases(unittest.TestCase):
2033
    """Test TF-IDF similarity functions.
2034
2035
    abydos.distance.sim_tfidf
2036
    """
2037
2038
    def test_sim(self):
2039
        """Test abydos.distance.sim_tfidf."""
2040
        pass
2041
2042
2043
class SimDistTestCases(unittest.TestCase):
2044
    """Test generic sim & dist functions.
2045
2046
    abydos.distance.sim & .dist
2047
    """
2048
2049
    def test_sim(self):
2050
        """Test abydos.distance.sim."""
2051
        self.assertEqual(sim('Niall', 'Nigel'),
2052
                         sim_levenshtein('Niall', 'Nigel'))
2053
        self.assertRaises(AttributeError, sim, 'abc', 'abc', 0)
2054
2055
    def test_dist(self):
2056
        """Test abydos.distance.dist."""
2057
        self.assertEqual(dist('Niall', 'Nigel'),
2058
                         dist_levenshtein('Niall', 'Nigel'))
2059
        self.assertRaises(AttributeError, dist, 'abc', 'abc', 0)
2060
2061
2062
if __name__ == '__main__':
2063
    unittest.main()
2064