Completed
Branch master (87ccc1)
by Chris
08:42
created

tests.stats.test_stats_pairwise   A

Complexity

Total Complexity 2

Size/Duplication

Total Lines 186
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 125
dl 0
loc 186
rs 10
c 0
b 0
f 0
wmc 2
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.tests.test_clustering.
20
21
This module contains unit tests for abydos.clustering
22
"""
23
24
from __future__ import unicode_literals
25
26
import unittest
27
28
from abydos.distance.token import sim_tanimoto
29
from abydos.stats.mean import amean, gmean, hmean
30
from abydos.stats.pairwise import mean_pairwise_similarity, \
31
    pairwise_similarity_statistics
32
33
NIALL = ('Niall', 'Neal', 'Neil', 'Njall', 'Njáll', 'Nigel', 'Neel', 'Nele',
34
         'Nigelli', 'Nel', 'Kneale', 'Uí Néill', 'O\'Neill', 'MacNeil',
35
         'MacNele', 'Niall Noígíallach')
36
37
NIALL_1WORD = ('Niall', 'Neal', 'Neil', 'Njall', 'Njáll', 'Nigel', 'Neel',
38
               'Nele', 'Nigelli', 'Nel', 'Kneale', 'O\'Neill', 'MacNeil',
39
               'MacNele')
40
41
42
class MPSTestCases(unittest.TestCase):
43
    """Test mean pairwise similarity functions.
44
45
    abydos.stats.pairwise.mean_pairwise_similarity
46
    """
47
48
    def test_mean_pairwise_similarity(self):
49
        """Test abydos.stats.pairwise.mean_pairwise_similarity."""
50
        self.assertEqual(mean_pairwise_similarity(NIALL), 0.29362587170180671)
51
        self.assertEqual(mean_pairwise_similarity(NIALL, symmetric=True),
52
                         0.2936258717018066)
53
        self.assertEqual(mean_pairwise_similarity(NIALL, mean_func=hmean),
54
                         0.29362587170180671)
55
        self.assertEqual(mean_pairwise_similarity(NIALL, mean_func=hmean,
56
                                                  symmetric=True),
57
                         0.2936258717018066)
58
        self.assertEqual(mean_pairwise_similarity(NIALL,
59
                                                  mean_func=gmean),
60
                         0.33747245800668441)
61
        self.assertEqual(mean_pairwise_similarity(NIALL, mean_func=gmean,
62
                                                  symmetric=True),
63
                         0.33747245800668441)
64
        self.assertEqual(mean_pairwise_similarity(NIALL,
65
                                                  mean_func=amean),
66
                         0.38009278711484601)
67
        self.assertEqual(mean_pairwise_similarity(NIALL, mean_func=amean,
68
                                                  symmetric=True),
69
                         0.38009278711484623)
70
71
        self.assertEqual(mean_pairwise_similarity(NIALL_1WORD),
72
                         mean_pairwise_similarity(' '.join(NIALL_1WORD)))
73
        self.assertEqual(mean_pairwise_similarity(NIALL_1WORD, symmetric=True),
74
                         mean_pairwise_similarity(' '.join(NIALL_1WORD),
75
                                                  symmetric=True))
76
        self.assertEqual(mean_pairwise_similarity(NIALL_1WORD,
77
                                                  mean_func=gmean),
78
                         mean_pairwise_similarity(' '.join(NIALL_1WORD),
79
                                                  mean_func=gmean))
80
        self.assertEqual(mean_pairwise_similarity(NIALL_1WORD,
81
                                                  mean_func=amean),
82
                         mean_pairwise_similarity(' '.join(NIALL_1WORD),
83
                                                  mean_func=amean))
84
85
        self.assertRaises(ValueError, mean_pairwise_similarity, ['a b c'])
86
        self.assertRaises(ValueError, mean_pairwise_similarity, 'abc')
87
        self.assertRaises(ValueError, mean_pairwise_similarity, 0)
88
        self.assertRaises(ValueError, mean_pairwise_similarity, NIALL,
89
                          mean_func='imaginary')
90
        self.assertRaises(ValueError, mean_pairwise_similarity, NIALL,
91
                          metric='imaginary')
92
93
        self.assertEqual(mean_pairwise_similarity(NIALL),
94
                         mean_pairwise_similarity(tuple(NIALL)))
95
        self.assertEqual(mean_pairwise_similarity(NIALL),
96
                         mean_pairwise_similarity(list(NIALL)))
97
        self.assertAlmostEqual(mean_pairwise_similarity(NIALL),
98
                               mean_pairwise_similarity(sorted(NIALL)))
99
        self.assertAlmostEqual(mean_pairwise_similarity(NIALL),
100
                               mean_pairwise_similarity(set(NIALL)))
101
102
103
class PSSTestCases(unittest.TestCase):
104
    """Test pairwise similarity statistics functions.
105
106
    abydos.stats.pairwise.pairwise_similarity_statistics
107
    """
108
109
    def test_pairwise_similarity_statistics(self):
110
        """Test abydos.stats.pairwise.pairwise_similarity_statistics."""
111
        (pw_max, pw_min, pw_mean,
112
         pw_std) = pairwise_similarity_statistics(NIALL, NIALL)
113
        self.assertAlmostEqual(pw_max, 1.0)
114
        self.assertAlmostEqual(pw_min, 0.11764705882352944)
115
        self.assertAlmostEqual(pw_mean, 0.4188369879201684)
116
        self.assertAlmostEqual(pw_std, 0.2265099631340623)
117
118
        (pw_max, pw_min, pw_mean,
119
         pw_std) = pairwise_similarity_statistics(NIALL, ('Kneal',))
120
        self.assertAlmostEqual(pw_max, 0.8333333333333334)
121
        self.assertAlmostEqual(pw_min, 0.11764705882352944)
122
        self.assertAlmostEqual(pw_mean, 0.30474877450980387)
123
        self.assertAlmostEqual(pw_std, 0.1842666797571549)
124
125
        # Test symmetric
126
        (pw_max, pw_min, pw_mean,
127
         pw_std) = pairwise_similarity_statistics(NIALL, NIALL, symmetric=True)
128
        self.assertAlmostEqual(pw_max, 1.0)
129
        self.assertAlmostEqual(pw_min, 0.11764705882352944)
130
        self.assertAlmostEqual(pw_mean, 0.4188369879201679)
131
        self.assertAlmostEqual(pw_std, 0.22650996313406255)
132
133
        (pw_max, pw_min, pw_mean,
134
         pw_std) = pairwise_similarity_statistics(NIALL, ('Kneal',),
135
                                                  symmetric=True)
136
        self.assertAlmostEqual(pw_max, 0.8333333333333334)
137
        self.assertAlmostEqual(pw_min, 0.11764705882352944)
138
        self.assertAlmostEqual(pw_mean, 0.304748774509804)
139
        self.assertAlmostEqual(pw_std, 0.18426667975715486)
140
141
        # Test with splittable strings
142
        (pw_max, pw_min, pw_mean,
143
         pw_std) = pairwise_similarity_statistics('The quick brown fox',
144
                                                  'jumped over the lazy dog.')
145
        self.assertAlmostEqual(pw_max, 0.6666666666666667)
146
        self.assertAlmostEqual(pw_min, 0.0)
147
        self.assertAlmostEqual(pw_mean, 0.08499999999999999)
148
        self.assertAlmostEqual(pw_std, 0.16132265804901677)
149
150
        (pw_max, pw_min, pw_mean,
151
         pw_std) = pairwise_similarity_statistics('The', 'jumped')
152
        self.assertAlmostEqual(pw_max, 0.16666666666666663)
153
        self.assertAlmostEqual(pw_min, 0.16666666666666663)
154
        self.assertAlmostEqual(pw_mean, 0.16666666666666663)
155
        self.assertAlmostEqual(pw_std, 0.0)
156
157
        # Test with a set metric
158
        (pw_max, pw_min, pw_mean,
159
         pw_std) = pairwise_similarity_statistics(NIALL, NIALL,
160
                                                  metric=sim_tanimoto)
161
        self.assertAlmostEqual(pw_max, 1.0)
162
        self.assertAlmostEqual(pw_min, 0.0)
163
        self.assertAlmostEqual(pw_mean, 0.23226906681010506)
164
        self.assertAlmostEqual(pw_std, 0.24747101181262784)
165
166
        # Test using hmean'
167
        (pw_max, pw_min, pw_mean,
168
         pw_std) = pairwise_similarity_statistics(NIALL, NIALL,
169
                                                  mean_func=hmean)
170
        self.assertAlmostEqual(pw_max, 1.0)
171
        self.assertAlmostEqual(pw_min, 0.11764705882352944)
172
        self.assertAlmostEqual(pw_mean, 0.30718771249150056)
173
        self.assertAlmostEqual(pw_std, 0.25253182790044676)
174
175
        # Test exceptions
176
        self.assertRaises(ValueError, pairwise_similarity_statistics, NIALL,
177
                          NIALL, mean_func=None)
178
        self.assertRaises(ValueError, pairwise_similarity_statistics, NIALL,
179
                          NIALL, metric=None)
180
        self.assertRaises(ValueError, pairwise_similarity_statistics, 5, NIALL)
181
        self.assertRaises(ValueError, pairwise_similarity_statistics, NIALL, 5)
182
183
184
if __name__ == '__main__':
185
    unittest.main()
186