Completed
Branch master (87ccc1)
by Chris
08:42
created

tests.distance.test_distance_token   A

Complexity

Total Complexity 17

Size/Duplication

Total Lines 548
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 358
dl 0
loc 548
rs 10
c 0
b 0
f 0
wmc 17
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.tests.test_distance.token.
20
21
This module contains unit tests for abydos.distance.token
22
"""
23
24
from __future__ import division, unicode_literals
25
26
import math
27
import unittest
28
29
from abydos.distance.token import bag, dist_bag, dist_cosine, dist_dice, \
30
    dist_jaccard, dist_monge_elkan, dist_overlap, dist_tversky, sim_bag, \
31
    sim_cosine, sim_dice, sim_jaccard, sim_monge_elkan, sim_overlap, \
32
    sim_tanimoto, sim_tversky, tanimoto
33
from abydos.tokenizer.qgram import QGrams
34
35
from .. import NONQ_FROM, NONQ_TO
36
37
38
class TverskyIndexTestCases(unittest.TestCase):
39
    """Test Tversky functions.
40
41
    abydos.distance.sim_tversky & .dist_tversky
42
    """
43
44
    def test_sim_tversky(self):
45
        """Test abydos.distance.sim_tversky."""
46
        self.assertEqual(sim_tversky('', ''), 1)
47
        self.assertEqual(sim_tversky('nelson', ''), 0)
48
        self.assertEqual(sim_tversky('', 'neilsen'), 0)
49
        self.assertAlmostEqual(sim_tversky('nelson', 'neilsen'), 4/11)
50
51
        self.assertEqual(sim_tversky('', '', 2), 1)
52
        self.assertEqual(sim_tversky('nelson', '', 2), 0)
53
        self.assertEqual(sim_tversky('', 'neilsen', 2), 0)
54
        self.assertAlmostEqual(sim_tversky('nelson', 'neilsen', 2), 4/11)
55
56
        # test valid alpha & beta
57
        self.assertRaises(ValueError, sim_tversky, 'abcd', 'dcba', 2, -1, -1)
58
        self.assertRaises(ValueError, sim_tversky, 'abcd', 'dcba', 2, -1, 0)
59
        self.assertRaises(ValueError, sim_tversky, 'abcd', 'dcba', 2, 0, -1)
60
61
        # test empty QGrams
62
        self.assertAlmostEqual(sim_tversky('nelson', 'neilsen', 7), 0.0)
63
64
        # test unequal alpha & beta
65
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 1), 3/11)
66
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 1, 2), 3/10)
67
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 2), 3/13)
68
69
        # test bias parameter
70
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 1, 1, 0.5),
71
                               7/11)
72
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 1, 0.5), 7/9)
73
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 1, 2, 0.5),
74
                               7/15)
75
        self.assertAlmostEqual(sim_tversky('niall', 'neal', 2, 2, 2, 0.5),
76
                               7/11)
77
78
        # supplied q-gram tests
79
        self.assertEqual(sim_tversky(QGrams(''), QGrams('')), 1)
80
        self.assertEqual(sim_tversky(QGrams('nelson'), QGrams('')), 0)
81
        self.assertEqual(sim_tversky(QGrams(''), QGrams('neilsen')), 0)
82
        self.assertAlmostEqual(sim_tversky(QGrams('nelson'),
83
                                           QGrams('neilsen')), 4/11)
84
85
        # non-q-gram tests
86
        self.assertEqual(sim_tversky('', '', 0), 1)
87
        self.assertEqual(sim_tversky('the quick', '', 0), 0)
88
        self.assertEqual(sim_tversky('', 'the quick', 0), 0)
89
        self.assertAlmostEqual(sim_tversky(NONQ_FROM, NONQ_TO, 0), 1/3)
90
        self.assertAlmostEqual(sim_tversky(NONQ_TO, NONQ_FROM, 0), 1/3)
91
92
    def test_dist_tversky(self):
93
        """Test abydos.distance.dist_tversky."""
94
        self.assertEqual(dist_tversky('', ''), 0)
95
        self.assertEqual(dist_tversky('nelson', ''), 1)
96
        self.assertEqual(dist_tversky('', 'neilsen'), 1)
97
        self.assertAlmostEqual(dist_tversky('nelson', 'neilsen'), 7/11)
98
99
        self.assertEqual(dist_tversky('', '', 2), 0)
100
        self.assertEqual(dist_tversky('nelson', '', 2), 1)
101
        self.assertEqual(dist_tversky('', 'neilsen', 2), 1)
102
        self.assertAlmostEqual(dist_tversky('nelson', 'neilsen', 2), 7/11)
103
104
        # test valid alpha & beta
105
        self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, -1, -1)
106
        self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, -1, 0)
107
        self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, 0, -1)
108
109
        # test empty QGrams
110
        self.assertAlmostEqual(dist_tversky('nelson', 'neilsen', 7), 1.0)
111
112
        # test unequal alpha & beta
113
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 1), 8/11)
114
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 2), 7/10)
115
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 2), 10/13)
116
117
        # test bias parameter
118
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 1, 0.5),
119
                               4/11)
120
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 1, 0.5),
121
                               2/9)
122
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 2, 0.5),
123
                               8/15)
124
        self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 2, 0.5),
125
                               4/11)
126
127
        # supplied q-gram tests
128
        self.assertEqual(dist_tversky(QGrams(''), QGrams('')), 0)
129
        self.assertEqual(dist_tversky(QGrams('nelson'), QGrams('')), 1)
130
        self.assertEqual(dist_tversky(QGrams(''), QGrams('neilsen')), 1)
131
        self.assertAlmostEqual(dist_tversky(QGrams('nelson'),
132
                                            QGrams('neilsen')), 7/11)
133
134
        # non-q-gram tests
135
        self.assertEqual(dist_tversky('', '', 0), 0)
136
        self.assertEqual(dist_tversky('the quick', '', 0), 1)
137
        self.assertEqual(dist_tversky('', 'the quick', 0), 1)
138
        self.assertAlmostEqual(dist_tversky(NONQ_FROM, NONQ_TO, 0), 2/3)
139
        self.assertAlmostEqual(dist_tversky(NONQ_TO, NONQ_FROM, 0), 2/3)
140
141
142
class DiceTestCases(unittest.TestCase):
143
    """Test Dice functions.
144
145
    abydos.distance.sim_dice & .dist_dice
146
    """
147
148
    def test_sim_dice(self):
149
        """Test abydos.distance.sim_dice."""
150
        self.assertEqual(sim_dice('', ''), 1)
151
        self.assertEqual(sim_dice('nelson', ''), 0)
152
        self.assertEqual(sim_dice('', 'neilsen'), 0)
153
        self.assertAlmostEqual(sim_dice('nelson', 'neilsen'), 8/15)
154
155
        self.assertEqual(sim_dice('', '', 2), 1)
156
        self.assertEqual(sim_dice('nelson', '', 2), 0)
157
        self.assertEqual(sim_dice('', 'neilsen', 2), 0)
158
        self.assertAlmostEqual(sim_dice('nelson', 'neilsen', 2), 8/15)
159
160
        # supplied q-gram tests
161
        self.assertEqual(sim_dice(QGrams(''), QGrams('')), 1)
162
        self.assertEqual(sim_dice(QGrams('nelson'), QGrams('')), 0)
163
        self.assertEqual(sim_dice(QGrams(''), QGrams('neilsen')), 0)
164
        self.assertAlmostEqual(sim_dice(QGrams('nelson'), QGrams('neilsen')),
165
                               8/15)
166
167
        # non-q-gram tests
168
        self.assertEqual(sim_dice('', '', 0), 1)
169
        self.assertEqual(sim_dice('the quick', '', 0), 0)
170
        self.assertEqual(sim_dice('', 'the quick', 0), 0)
171
        self.assertAlmostEqual(sim_dice(NONQ_FROM, NONQ_TO, 0), 1/2)
172
        self.assertAlmostEqual(sim_dice(NONQ_TO, NONQ_FROM, 0), 1/2)
173
174
    def test_dist_dice(self):
175
        """Test abydos.distance.dist_dice."""
176
        self.assertEqual(dist_dice('', ''), 0)
177
        self.assertEqual(dist_dice('nelson', ''), 1)
178
        self.assertEqual(dist_dice('', 'neilsen'), 1)
179
        self.assertAlmostEqual(dist_dice('nelson', 'neilsen'), 7/15)
180
181
        self.assertEqual(dist_dice('', '', 2), 0)
182
        self.assertEqual(dist_dice('nelson', '', 2), 1)
183
        self.assertEqual(dist_dice('', 'neilsen', 2), 1)
184
        self.assertAlmostEqual(dist_dice('nelson', 'neilsen', 2), 7/15)
185
186
        # supplied q-gram tests
187
        self.assertEqual(dist_dice(QGrams(''), QGrams('')), 0)
188
        self.assertEqual(dist_dice(QGrams('nelson'), QGrams('')), 1)
189
        self.assertEqual(dist_dice(QGrams(''), QGrams('neilsen')), 1)
190
        self.assertAlmostEqual(dist_dice(QGrams('nelson'), QGrams('neilsen')),
191
                               7/15)
192
193
        # non-q-gram tests
194
        self.assertEqual(dist_dice('', '', 0), 0)
195
        self.assertEqual(dist_dice('the quick', '', 0), 1)
196
        self.assertEqual(dist_dice('', 'the quick', 0), 1)
197
        self.assertAlmostEqual(dist_dice(NONQ_FROM, NONQ_TO, 0), 1/2)
198
        self.assertAlmostEqual(dist_dice(NONQ_TO, NONQ_FROM, 0), 1/2)
199
200
201
class JaccardTestCases(unittest.TestCase):
202
    """Test Jaccard functions.
203
204
    abydos.distance.sim_jaccard & .dist_jaccard
205
    """
206
207
    def test_sim_jaccard(self):
208
        """Test abydos.distance.sim_jaccard."""
209
        self.assertEqual(sim_jaccard('', ''), 1)
210
        self.assertEqual(sim_jaccard('nelson', ''), 0)
211
        self.assertEqual(sim_jaccard('', 'neilsen'), 0)
212
        self.assertAlmostEqual(sim_jaccard('nelson', 'neilsen'), 4/11)
213
214
        self.assertEqual(sim_jaccard('', '', 2), 1)
215
        self.assertEqual(sim_jaccard('nelson', '', 2), 0)
216
        self.assertEqual(sim_jaccard('', 'neilsen', 2), 0)
217
        self.assertAlmostEqual(sim_jaccard('nelson', 'neilsen', 2), 4/11)
218
219
        # supplied q-gram tests
220
        self.assertEqual(sim_jaccard(QGrams(''), QGrams('')), 1)
221
        self.assertEqual(sim_jaccard(QGrams('nelson'), QGrams('')), 0)
222
        self.assertEqual(sim_jaccard(QGrams(''), QGrams('neilsen')), 0)
223
        self.assertAlmostEqual(sim_jaccard(QGrams('nelson'),
224
                                           QGrams('neilsen')), 4/11)
225
226
        # non-q-gram tests
227
        self.assertEqual(sim_jaccard('', '', 0), 1)
228
        self.assertEqual(sim_jaccard('the quick', '', 0), 0)
229
        self.assertEqual(sim_jaccard('', 'the quick', 0), 0)
230
        self.assertAlmostEqual(sim_jaccard(NONQ_FROM, NONQ_TO, 0), 1/3)
231
        self.assertAlmostEqual(sim_jaccard(NONQ_TO, NONQ_FROM, 0), 1/3)
232
233
    def test_dist_jaccard(self):
234
        """Test abydos.distance.dist_jaccard."""
235
        self.assertEqual(dist_jaccard('', ''), 0)
236
        self.assertEqual(dist_jaccard('nelson', ''), 1)
237
        self.assertEqual(dist_jaccard('', 'neilsen'), 1)
238
        self.assertAlmostEqual(dist_jaccard('nelson', 'neilsen'), 7/11)
239
240
        self.assertEqual(dist_jaccard('', '', 2), 0)
241
        self.assertEqual(dist_jaccard('nelson', '', 2), 1)
242
        self.assertEqual(dist_jaccard('', 'neilsen', 2), 1)
243
        self.assertAlmostEqual(dist_jaccard('nelson', 'neilsen', 2), 7/11)
244
245
        # supplied q-gram tests
246
        self.assertEqual(dist_jaccard(QGrams(''), QGrams('')), 0)
247
        self.assertEqual(dist_jaccard(QGrams('nelson'), QGrams('')), 1)
248
        self.assertEqual(dist_jaccard(QGrams(''), QGrams('neilsen')), 1)
249
        self.assertAlmostEqual(dist_jaccard(QGrams('nelson'),
250
                                            QGrams('neilsen')), 7/11)
251
252
        # non-q-gram tests
253
        self.assertEqual(dist_jaccard('', '', 0), 0)
254
        self.assertEqual(dist_jaccard('the quick', '', 0), 1)
255
        self.assertEqual(dist_jaccard('', 'the quick', 0), 1)
256
        self.assertAlmostEqual(dist_jaccard(NONQ_FROM, NONQ_TO, 0), 2/3)
257
        self.assertAlmostEqual(dist_jaccard(NONQ_TO, NONQ_FROM, 0), 2/3)
258
259
260
class OverlapTestCases(unittest.TestCase):
261
    """Test overlap functions.
262
263
    abydos.distance.sim_overlap & .dist_overlap
264
    """
265
266
    def test_sim_overlap(self):
267
        """Test abydos.distance.sim_overlap."""
268
        self.assertEqual(sim_overlap('', ''), 1)
269
        self.assertEqual(sim_overlap('nelson', ''), 0)
270
        self.assertEqual(sim_overlap('', 'neilsen'), 0)
271
        self.assertAlmostEqual(sim_overlap('nelson', 'neilsen'), 4/7)
272
273
        self.assertEqual(sim_overlap('', '', 2), 1)
274
        self.assertEqual(sim_overlap('nelson', '', 2), 0)
275
        self.assertEqual(sim_overlap('', 'neilsen', 2), 0)
276
        self.assertAlmostEqual(sim_overlap('nelson', 'neilsen', 2), 4/7)
277
278
        # supplied q-gram tests
279
        self.assertEqual(sim_overlap(QGrams(''), QGrams('')), 1)
280
        self.assertEqual(sim_overlap(QGrams('nelson'), QGrams('')), 0)
281
        self.assertEqual(sim_overlap(QGrams(''), QGrams('neilsen')), 0)
282
        self.assertAlmostEqual(sim_overlap(QGrams('nelson'),
283
                                           QGrams('neilsen')), 4/7)
284
285
        # non-q-gram tests
286
        self.assertEqual(sim_overlap('', '', 0), 1)
287
        self.assertEqual(sim_overlap('the quick', '', 0), 0)
288
        self.assertEqual(sim_overlap('', 'the quick', 0), 0)
289
        self.assertAlmostEqual(sim_overlap(NONQ_FROM, NONQ_TO, 0), 4/7)
290
        self.assertAlmostEqual(sim_overlap(NONQ_TO, NONQ_FROM, 0), 4/7)
291
292
    def test_dist_overlap(self):
293
        """Test abydos.distance.dist_overlap."""
294
        self.assertEqual(dist_overlap('', ''), 0)
295
        self.assertEqual(dist_overlap('nelson', ''), 1)
296
        self.assertEqual(dist_overlap('', 'neilsen'), 1)
297
        self.assertAlmostEqual(dist_overlap('nelson', 'neilsen'), 3/7)
298
299
        self.assertEqual(dist_overlap('', '', 2), 0)
300
        self.assertEqual(dist_overlap('nelson', '', 2), 1)
301
        self.assertEqual(dist_overlap('', 'neilsen', 2), 1)
302
        self.assertAlmostEqual(dist_overlap('nelson', 'neilsen', 2), 3/7)
303
304
        # supplied q-gram tests
305
        self.assertEqual(dist_overlap(QGrams(''), QGrams('')), 0)
306
        self.assertEqual(dist_overlap(QGrams('nelson'), QGrams('')), 1)
307
        self.assertEqual(dist_overlap(QGrams(''), QGrams('neilsen')), 1)
308
        self.assertAlmostEqual(dist_overlap(QGrams('nelson'),
309
                                            QGrams('neilsen')), 3/7)
310
311
        # non-q-gram tests
312
        self.assertEqual(dist_overlap('', '', 0), 0)
313
        self.assertEqual(dist_overlap('the quick', '', 0), 1)
314
        self.assertEqual(dist_overlap('', 'the quick', 0), 1)
315
        self.assertAlmostEqual(dist_overlap(NONQ_FROM, NONQ_TO, 0), 3/7)
316
        self.assertAlmostEqual(dist_overlap(NONQ_TO, NONQ_FROM, 0), 3/7)
317
318
319
class TanimotoTestCases(unittest.TestCase):
320
    """Test Tanimoto functions.
321
322
    abydos.distance.sim_tanimoto & .tanimoto
323
    """
324
325
    def test_tanimoto_coeff(self):
326
        """Test abydos.distance.sim_tanimoto."""
327
        self.assertEqual(sim_tanimoto('', ''), 1)
328
        self.assertEqual(sim_tanimoto('nelson', ''), 0)
329
        self.assertEqual(sim_tanimoto('', 'neilsen'), 0)
330
        self.assertAlmostEqual(sim_tanimoto('nelson', 'neilsen'), 4/11)
331
332
        self.assertEqual(sim_tanimoto('', '', 2), 1)
333
        self.assertEqual(sim_tanimoto('nelson', '', 2), 0)
334
        self.assertEqual(sim_tanimoto('', 'neilsen', 2), 0)
335
        self.assertAlmostEqual(sim_tanimoto('nelson', 'neilsen', 2), 4/11)
336
337
        # supplied q-gram tests
338
        self.assertEqual(sim_tanimoto(QGrams(''), QGrams('')), 1)
339
        self.assertEqual(sim_tanimoto(QGrams('nelson'), QGrams('')), 0)
340
        self.assertEqual(sim_tanimoto(QGrams(''), QGrams('neilsen')), 0)
341
        self.assertAlmostEqual(sim_tanimoto(QGrams('nelson'),
342
                                            QGrams('neilsen')), 4/11)
343
344
        # non-q-gram tests
345
        self.assertEqual(sim_tanimoto('', '', 0), 1)
346
        self.assertEqual(sim_tanimoto('the quick', '', 0), 0)
347
        self.assertEqual(sim_tanimoto('', 'the quick', 0), 0)
348
        self.assertAlmostEqual(sim_tanimoto(NONQ_FROM, NONQ_TO, 0), 1/3)
349
        self.assertAlmostEqual(sim_tanimoto(NONQ_TO, NONQ_FROM, 0), 1/3)
350
351
    def test_tanimoto(self):
352
        """Test abydos.distance.tanimoto."""
353
        self.assertEqual(tanimoto('', ''), 0)
354
        self.assertEqual(tanimoto('nelson', ''), float('-inf'))
355
        self.assertEqual(tanimoto('', 'neilsen'), float('-inf'))
356
        self.assertAlmostEqual(tanimoto('nelson', 'neilsen'),
357
                               math.log(4/11, 2))
358
359
        self.assertEqual(tanimoto('', '', 2), 0)
360
        self.assertEqual(tanimoto('nelson', '', 2), float('-inf'))
361
        self.assertEqual(tanimoto('', 'neilsen', 2), float('-inf'))
362
        self.assertAlmostEqual(tanimoto('nelson', 'neilsen', 2),
363
                               math.log(4/11, 2))
364
365
        # supplied q-gram tests
366
        self.assertEqual(tanimoto(QGrams(''), QGrams('')), 0)
367
        self.assertEqual(tanimoto(QGrams('nelson'), QGrams('')), float('-inf'))
368
        self.assertEqual(tanimoto(QGrams(''), QGrams('neilsen')),
369
                         float('-inf'))
370
        self.assertAlmostEqual(tanimoto(QGrams('nelson'), QGrams('neilsen')),
371
                               math.log(4/11, 2))
372
373
        # non-q-gram tests
374
        self.assertEqual(tanimoto('', '', 0), 0)
375
        self.assertEqual(tanimoto('the quick', '', 0), float('-inf'))
376
        self.assertEqual(tanimoto('', 'the quick', 0), float('-inf'))
377
        self.assertAlmostEqual(tanimoto(NONQ_FROM, NONQ_TO, 0),
378
                               math.log(1/3, 2))
379
        self.assertAlmostEqual(tanimoto(NONQ_TO, NONQ_FROM, 0),
380
                               math.log(1/3, 2))
381
382
383
class CosineSimilarityTestCases(unittest.TestCase):
384
    """Test cosine similarity functions.
385
386
    abydos.distance.sim_cosine & .dist_cosine
387
    """
388
389
    def test_sim_cosine(self):
390
        """Test abydos.distance.sim_cosine."""
391
        self.assertEqual(sim_cosine('', ''), 1)
392
        self.assertEqual(sim_cosine('nelson', ''), 0)
393
        self.assertEqual(sim_cosine('', 'neilsen'), 0)
394
        self.assertAlmostEqual(sim_cosine('nelson', 'neilsen'),
395
                               4/math.sqrt(7*8))
396
397
        self.assertEqual(sim_cosine('', '', 2), 1)
398
        self.assertEqual(sim_cosine('nelson', '', 2), 0)
399
        self.assertEqual(sim_cosine('', 'neilsen', 2), 0)
400
        self.assertAlmostEqual(sim_cosine('nelson', 'neilsen', 2),
401
                               4/math.sqrt(7*8))
402
403
        # supplied q-gram tests
404
        self.assertEqual(sim_cosine(QGrams(''), QGrams('')), 1)
405
        self.assertEqual(sim_cosine(QGrams('nelson'), QGrams('')), 0)
406
        self.assertEqual(sim_cosine(QGrams(''), QGrams('neilsen')), 0)
407
        self.assertAlmostEqual(sim_cosine(QGrams('nelson'), QGrams('neilsen')),
408
                               4/math.sqrt(7*8))
409
410
        # non-q-gram tests
411
        self.assertEqual(sim_cosine('', '', 0), 1)
412
        self.assertEqual(sim_cosine('the quick', '', 0), 0)
413
        self.assertEqual(sim_cosine('', 'the quick', 0), 0)
414
        self.assertAlmostEqual(sim_cosine(NONQ_FROM, NONQ_TO, 0),
415
                               4/math.sqrt(9*7))
416
        self.assertAlmostEqual(sim_cosine(NONQ_TO, NONQ_FROM, 0),
417
                               4/math.sqrt(9*7))
418
419
    def test_dist_cosine(self):
420
        """Test abydos.distance.dist_cosine."""
421
        self.assertEqual(dist_cosine('', ''), 0)
422
        self.assertEqual(dist_cosine('nelson', ''), 1)
423
        self.assertEqual(dist_cosine('', 'neilsen'), 1)
424
        self.assertAlmostEqual(dist_cosine('nelson', 'neilsen'),
425
                               1-(4/math.sqrt(7*8)))
426
427
        self.assertEqual(dist_cosine('', '', 2), 0)
428
        self.assertEqual(dist_cosine('nelson', '', 2), 1)
429
        self.assertEqual(dist_cosine('', 'neilsen', 2), 1)
430
        self.assertAlmostEqual(dist_cosine('nelson', 'neilsen', 2),
431
                               1-(4/math.sqrt(7*8)))
432
433
        # supplied q-gram tests
434
        self.assertEqual(dist_cosine(QGrams(''), QGrams('')), 0)
435
        self.assertEqual(dist_cosine(QGrams('nelson'), QGrams('')), 1)
436
        self.assertEqual(dist_cosine(QGrams(''), QGrams('neilsen')), 1)
437
        self.assertAlmostEqual(dist_cosine(QGrams('nelson'),
438
                                           QGrams('neilsen')),
439
                               1-(4/math.sqrt(7*8)))
440
441
        # non-q-gram tests
442
        self.assertEqual(dist_cosine('', '', 0), 0)
443
        self.assertEqual(dist_cosine('the quick', '', 0), 1)
444
        self.assertEqual(dist_cosine('', 'the quick', 0), 1)
445
        self.assertAlmostEqual(dist_cosine(NONQ_FROM, NONQ_TO, 0),
446
                               1-4/math.sqrt(9*7))
447
        self.assertAlmostEqual(dist_cosine(NONQ_TO, NONQ_FROM, 0),
448
                               1-4/math.sqrt(9*7))
449
450
451
class MongeElkanTestCases(unittest.TestCase):
452
    """Test Monge-Elkan functions.
453
454
    abydos.distance.sim_monge_elkan & .dist_monge_elkan
455
    """
456
457
    def test_sim_monge_elkan(self):
458
        """Test abydos.distance.sim_monge_elkan."""
459
        self.assertEqual(sim_monge_elkan('', ''), 1)
460
        self.assertEqual(sim_monge_elkan('', 'a'), 0)
461
        self.assertEqual(sim_monge_elkan('a', 'a'), 1)
462
463
        self.assertEqual(sim_monge_elkan('Niall', 'Neal'), 3/4)
464
        self.assertEqual(sim_monge_elkan('Niall', 'Njall'), 5/6)
465
        self.assertEqual(sim_monge_elkan('Niall', 'Niel'), 3/4)
466
        self.assertEqual(sim_monge_elkan('Niall', 'Nigel'), 3/4)
467
468
        self.assertEqual(sim_monge_elkan('Niall', 'Neal', symmetric=True),
469
                         31/40)
470
        self.assertEqual(sim_monge_elkan('Niall', 'Njall', symmetric=True),
471
                         5/6)
472
        self.assertEqual(sim_monge_elkan('Niall', 'Niel', symmetric=True),
473
                         31/40)
474
        self.assertAlmostEqual(sim_monge_elkan('Niall', 'Nigel',
475
                                               symmetric=True), 17/24)
476
477
    def test_dist_monge_elkan(self):
478
        """Test abydos.distance.dist_monge_elkan."""
479
        self.assertEqual(dist_monge_elkan('', ''), 0)
480
        self.assertEqual(dist_monge_elkan('', 'a'), 1)
481
482
        self.assertEqual(dist_monge_elkan('Niall', 'Neal'), 1/4)
483
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Njall'), 1/6)
484
        self.assertEqual(dist_monge_elkan('Niall', 'Niel'), 1/4)
485
        self.assertEqual(dist_monge_elkan('Niall', 'Nigel'), 1/4)
486
487
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Neal',
488
                                                symmetric=True), 9/40)
489
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Njall',
490
                                                symmetric=True), 1/6)
491
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Niel',
492
                                                symmetric=True), 9/40)
493
        self.assertAlmostEqual(dist_monge_elkan('Niall', 'Nigel',
494
                                                symmetric=True), 7/24)
495
496
497
class BagTestCases(unittest.TestCase):
498
    """Test bag similarity functions.
499
500
    abydos.distance.bag, .sim_bag & .dist_bag
501
    """
502
503
    def test_bag(self):
504
        """Test abydos.distance.bag."""
505
        self.assertEqual(bag('', ''), 0)
506
        self.assertEqual(bag('nelson', ''), 6)
507
        self.assertEqual(bag('', 'neilsen'), 7)
508
        self.assertEqual(bag('ab', 'a'), 1)
509
        self.assertEqual(bag('ab', 'c'), 2)
510
        self.assertEqual(bag('nelson', 'neilsen'), 2)
511
        self.assertEqual(bag('neilsen', 'nelson'), 2)
512
        self.assertEqual(bag('niall', 'neal'), 2)
513
        self.assertEqual(bag('aluminum', 'Catalan'), 5)
514
        self.assertEqual(bag('abcdefg', 'hijklm'), 7)
515
        self.assertEqual(bag('abcdefg', 'hijklmno'), 8)
516
517
    def test_sim_bag(self):
518
        """Test abydos.distance.sim_bag."""
519
        self.assertEqual(sim_bag('', ''), 1)
520
        self.assertEqual(sim_bag('nelson', ''), 0)
521
        self.assertEqual(sim_bag('', 'neilsen'), 0)
522
        self.assertEqual(sim_bag('ab', 'a'), 0.5)
523
        self.assertEqual(sim_bag('ab', 'c'), 0)
524
        self.assertAlmostEqual(sim_bag('nelson', 'neilsen'), 5/7)
525
        self.assertAlmostEqual(sim_bag('neilsen', 'nelson'), 5/7)
526
        self.assertAlmostEqual(sim_bag('niall', 'neal'), 3/5)
527
        self.assertAlmostEqual(sim_bag('aluminum', 'Catalan'), 3/8)
528
        self.assertEqual(sim_bag('abcdefg', 'hijklm'), 0)
529
        self.assertEqual(sim_bag('abcdefg', 'hijklmno'), 0)
530
531
    def test_dist_bag(self):
532
        """Test abydos.distance.dist_bag."""
533
        self.assertEqual(dist_bag('', ''), 0)
534
        self.assertEqual(dist_bag('nelson', ''), 1)
535
        self.assertEqual(dist_bag('', 'neilsen'), 1)
536
        self.assertEqual(dist_bag('ab', 'a'), 0.5)
537
        self.assertEqual(dist_bag('ab', 'c'), 1)
538
        self.assertAlmostEqual(dist_bag('nelson', 'neilsen'), 2/7)
539
        self.assertAlmostEqual(dist_bag('neilsen', 'nelson'), 2/7)
540
        self.assertAlmostEqual(dist_bag('niall', 'neal'), 2/5)
541
        self.assertAlmostEqual(dist_bag('aluminum', 'Catalan'), 5/8)
542
        self.assertEqual(dist_bag('abcdefg', 'hijklm'), 1)
543
        self.assertEqual(dist_bag('abcdefg', 'hijklmno'), 1)
544
545
546
if __name__ == '__main__':
547
    unittest.main()
548