1
|
|
|
# -*- coding: utf-8 -*- |
2
|
|
|
|
3
|
|
|
# Copyright 2014-2018 by Christopher C. Little. |
4
|
|
|
# This file is part of Abydos. |
5
|
|
|
# |
6
|
|
|
# Abydos is free software: you can redistribute it and/or modify |
7
|
|
|
# it under the terms of the GNU General Public License as published by |
8
|
|
|
# the Free Software Foundation, either version 3 of the License, or |
9
|
|
|
# (at your option) any later version. |
10
|
|
|
# |
11
|
|
|
# Abydos is distributed in the hope that it will be useful, |
12
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
13
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14
|
|
|
# GNU General Public License for more details. |
15
|
|
|
# |
16
|
|
|
# You should have received a copy of the GNU General Public License |
17
|
|
|
# along with Abydos. If not, see <http://www.gnu.org/licenses/>. |
18
|
|
|
|
19
|
|
|
"""abydos.tests.test_distance.compression. |
20
|
|
|
|
21
|
|
|
This module contains unit tests for abydos.distance.compression |
22
|
|
|
""" |
23
|
|
|
|
24
|
|
|
from __future__ import division, unicode_literals |
25
|
|
|
|
26
|
|
|
import pkgutil |
27
|
|
|
import sys |
28
|
|
|
import unittest |
29
|
|
|
|
30
|
|
|
from abydos.compression import arithmetic |
31
|
|
|
from abydos.distance.compression import dist_ncd_arith, dist_ncd_bwtrle, \ |
32
|
|
|
dist_ncd_bz2, dist_ncd_lzma, dist_ncd_rle, dist_ncd_zlib, sim_ncd_arith, \ |
33
|
|
|
sim_ncd_bwtrle, sim_ncd_bz2, sim_ncd_lzma, sim_ncd_rle, sim_ncd_zlib |
34
|
|
|
|
35
|
|
|
from .. import NIALL |
36
|
|
|
|
37
|
|
|
|
38
|
|
|
class CompressionTestCases(unittest.TestCase): |
39
|
|
|
"""Test compression distance functions. |
40
|
|
|
|
41
|
|
|
abydos.distance.compression.dist_ncd_arith, .dist_ncd_bwtrle, |
42
|
|
|
.dist_ncd_bz2, .dist_ncd_lzma, .dist_ncd_rle, .dist_ncd_zlib, |
43
|
|
|
.sim_ncd_arith, .sim_ncd_bwtrle, .sim_ncd_bz2, .sim_ncd_lzma, .sim_ncd_rle, |
44
|
|
|
& .sim_ncd_zlib |
45
|
|
|
""" |
46
|
|
|
|
47
|
|
|
arith_dict = arithmetic.train(' '.join(NIALL)) |
48
|
|
|
|
49
|
|
|
def test_dist_ncd_bz2(self): |
50
|
|
|
"""Test abydos.distance.compression.dist_ncd_bz2.""" |
51
|
|
|
self.assertEqual(dist_ncd_bz2('', ''), 0) |
52
|
|
|
self.assertGreater(dist_ncd_bz2('a', ''), 0) |
53
|
|
|
self.assertGreater(dist_ncd_bz2('abcdefg', 'fg'), 0) |
54
|
|
|
|
55
|
|
|
def test_dist_ncd_zlib(self): |
56
|
|
|
"""Test abydos.distance.compression.dist_ncd_zlib.""" |
57
|
|
|
self.assertEqual(dist_ncd_zlib('', ''), 0) |
58
|
|
|
self.assertGreater(dist_ncd_zlib('a', ''), 0) |
59
|
|
|
self.assertGreater(dist_ncd_zlib('abcdefg', 'fg'), 0) |
60
|
|
|
|
61
|
|
|
def test_dist_ncd_arith(self): |
62
|
|
|
"""Test abydos.distance.compression.dist_ncd_arith.""" |
63
|
|
|
self.assertEqual(dist_ncd_arith('', ''), 0) |
64
|
|
|
self.assertEqual(dist_ncd_arith('', '', self.arith_dict), 0) |
65
|
|
|
self.assertGreater(dist_ncd_arith('a', ''), 0) |
66
|
|
|
self.assertGreater(dist_ncd_arith('a', '', self.arith_dict), 0) |
67
|
|
|
self.assertGreater(dist_ncd_arith('abcdefg', 'fg'), 0) |
68
|
|
|
|
69
|
|
|
self.assertAlmostEqual(dist_ncd_arith('Niall', 'Neil', |
70
|
|
|
self.arith_dict), |
71
|
|
|
0.608695652173913) |
72
|
|
|
self.assertAlmostEqual(dist_ncd_arith('Neil', 'Niall', |
73
|
|
|
self.arith_dict), |
74
|
|
|
0.608695652173913) |
75
|
|
|
self.assertAlmostEqual(dist_ncd_arith('Niall', 'Neil'), |
76
|
|
|
0.6875) |
77
|
|
|
self.assertAlmostEqual(dist_ncd_arith('Neil', 'Niall'), |
78
|
|
|
0.6875) |
79
|
|
|
self.assertAlmostEqual(dist_ncd_arith('Njáll', 'Njall', |
80
|
|
|
self.arith_dict), |
81
|
|
|
0.714285714285714) |
82
|
|
|
self.assertAlmostEqual(dist_ncd_arith('Njall', 'Njáll', |
83
|
|
|
self.arith_dict), |
84
|
|
|
0.714285714285714) |
85
|
|
|
self.assertAlmostEqual(dist_ncd_arith('Njáll', 'Njall'), 0.75) |
86
|
|
|
self.assertAlmostEqual(dist_ncd_arith('Njall', 'Njáll'), 0.75) |
87
|
|
|
|
88
|
|
|
def test_dist_ncd_bwtrle(self): |
89
|
|
|
"""Test abydos.distance..compression.dist_ncd_bwtrle.""" |
90
|
|
|
self.assertEqual(dist_ncd_bwtrle('', ''), 0) |
91
|
|
|
self.assertGreater(dist_ncd_bwtrle('a', ''), 0) |
92
|
|
|
self.assertGreater(dist_ncd_bwtrle('abcdefg', 'fg'), 0) |
93
|
|
|
|
94
|
|
|
self.assertAlmostEqual(dist_ncd_bwtrle('abc', 'abc'), 0) |
95
|
|
|
self.assertAlmostEqual(dist_ncd_bwtrle('abc', 'def'), 0.75) |
96
|
|
|
|
97
|
|
|
self.assertAlmostEqual(dist_ncd_bwtrle('banana', 'banane'), |
98
|
|
|
0.57142857142) |
99
|
|
|
self.assertAlmostEqual(dist_ncd_bwtrle('bananas', 'bananen'), 0.5) |
100
|
|
|
|
101
|
|
|
def test_dist_ncd_rle(self): |
102
|
|
|
"""Test abydos.distance..compression.dist_ncd_rle.""" |
103
|
|
|
self.assertEqual(dist_ncd_rle('', ''), 0) |
104
|
|
|
self.assertGreater(dist_ncd_rle('a', ''), 0) |
105
|
|
|
self.assertGreater(dist_ncd_rle('abcdefg', 'fg'), 0) |
106
|
|
|
|
107
|
|
|
self.assertAlmostEqual(dist_ncd_rle('abc', 'abc'), 0) |
108
|
|
|
self.assertAlmostEqual(dist_ncd_rle('abc', 'def'), 1) |
109
|
|
|
|
110
|
|
|
self.assertAlmostEqual(dist_ncd_rle('aaa', 'bbaaa'), 0.5) |
111
|
|
|
self.assertAlmostEqual(dist_ncd_rle('abb', 'bbba'), 1/3) |
112
|
|
|
|
113
|
|
|
def test_sim_ncd_bz2(self): |
114
|
|
|
"""Test abydos.distance.compression.sim_ncd_bz2.""" |
115
|
|
|
self.assertEqual(sim_ncd_bz2('', ''), 1) |
116
|
|
|
self.assertLess(sim_ncd_bz2('a', ''), 1) |
117
|
|
|
self.assertLess(sim_ncd_bz2('abcdefg', 'fg'), 1) |
118
|
|
|
|
119
|
|
|
def test_sim_ncd_zlib(self): |
120
|
|
|
"""Test abydos.distance.compression.sim_ncd_zlib.""" |
121
|
|
|
self.assertEqual(sim_ncd_zlib('', ''), 1) |
122
|
|
|
self.assertLess(sim_ncd_zlib('a', ''), 1) |
123
|
|
|
self.assertLess(sim_ncd_zlib('abcdefg', 'fg'), 1) |
124
|
|
|
|
125
|
|
|
def test_sim_ncd_arith(self): |
126
|
|
|
"""Test abydos.distance.compression.sim_ncd_arith.""" |
127
|
|
|
self.assertEqual(sim_ncd_arith('', ''), 1) |
128
|
|
|
self.assertEqual(sim_ncd_arith('', '', self.arith_dict), 1) |
129
|
|
|
self.assertLess(sim_ncd_arith('a', ''), 1) |
130
|
|
|
self.assertLess(sim_ncd_arith('a', '', self.arith_dict), 1) |
131
|
|
|
self.assertLess(sim_ncd_arith('abcdefg', 'fg'), 1) |
132
|
|
|
|
133
|
|
|
self.assertAlmostEqual(sim_ncd_arith('Niall', 'Neil', self.arith_dict), |
134
|
|
|
0.3913043478260869) |
135
|
|
|
self.assertAlmostEqual(sim_ncd_arith('Neil', 'Niall', self.arith_dict), |
136
|
|
|
0.3913043478260869) |
137
|
|
|
self.assertAlmostEqual(sim_ncd_arith('Niall', 'Neil'), 0.3125) |
138
|
|
|
self.assertAlmostEqual(sim_ncd_arith('Neil', 'Niall'), 0.3125) |
139
|
|
|
self.assertAlmostEqual(sim_ncd_arith('Njáll', 'Njall', |
140
|
|
|
self.arith_dict), |
141
|
|
|
0.285714285714285) |
142
|
|
|
self.assertAlmostEqual(sim_ncd_arith('Njall', 'Njáll', |
143
|
|
|
self.arith_dict), |
144
|
|
|
0.285714285714285) |
145
|
|
|
self.assertAlmostEqual(sim_ncd_arith('Njáll', 'Njall'), 0.25) |
146
|
|
|
self.assertAlmostEqual(sim_ncd_arith('Njall', 'Njáll'), 0.25) |
147
|
|
|
|
148
|
|
|
def test_sim_ncd_rle(self): |
149
|
|
|
"""Test abydos.distance.sim_ncd_rle.""" |
150
|
|
|
self.assertEqual(sim_ncd_rle('', ''), 1) |
151
|
|
|
self.assertLess(sim_ncd_rle('a', ''), 1) |
152
|
|
|
self.assertLess(sim_ncd_rle('abcdefg', 'fg'), 1) |
153
|
|
|
|
154
|
|
|
self.assertAlmostEqual(sim_ncd_rle('abc', 'abc'), 1) |
155
|
|
|
self.assertAlmostEqual(sim_ncd_rle('abc', 'def'), 0) |
156
|
|
|
|
157
|
|
|
self.assertAlmostEqual(sim_ncd_rle('aaa', 'bbaaa'), 0.5) |
158
|
|
|
self.assertAlmostEqual(sim_ncd_rle('abb', 'bbba'), 2/3) |
159
|
|
|
|
160
|
|
|
def test_sim_ncd_bwtrle(self): |
161
|
|
|
"""Test abydos.distance.sim_ncd_bwtrle.""" |
162
|
|
|
self.assertEqual(sim_ncd_bwtrle('', ''), 1) |
163
|
|
|
self.assertLess(sim_ncd_bwtrle('a', ''), 1) |
164
|
|
|
self.assertLess(sim_ncd_bwtrle('abcdefg', 'fg'), 1) |
165
|
|
|
|
166
|
|
|
self.assertAlmostEqual(sim_ncd_bwtrle('abc', 'abc'), 1) |
167
|
|
|
self.assertAlmostEqual(sim_ncd_bwtrle('abc', 'def'), 0.25) |
168
|
|
|
|
169
|
|
|
self.assertAlmostEqual(sim_ncd_bwtrle('banana', 'banane'), |
170
|
|
|
0.42857142857) |
171
|
|
|
self.assertAlmostEqual(sim_ncd_bwtrle('bananas', 'bananen'), 0.5) |
172
|
|
|
|
173
|
|
|
def test_sim_ncd_lzma(self): |
174
|
|
|
"""Test abydos.distance.compression.dist_ncd_lzma & .sim_ncd_lzma.""" |
175
|
|
|
if bool(pkgutil.find_loader('lzma')): |
176
|
|
|
self.assertEqual(sim_ncd_lzma('', ''), 1) |
177
|
|
|
self.assertLess(sim_ncd_lzma('a', ''), 1) |
178
|
|
|
self.assertLess(sim_ncd_lzma('abcdefg', 'fg'), 1) |
179
|
|
|
|
180
|
|
|
self.assertEqual(dist_ncd_lzma('', ''), 0) |
181
|
|
|
self.assertGreater(dist_ncd_lzma('a', ''), 0) |
182
|
|
|
self.assertGreater(dist_ncd_lzma('abcdefg', 'fg'), 0) |
183
|
|
|
del sys.modules['lzma'] |
184
|
|
|
|
185
|
|
|
self.assertRaises(ValueError, sim_ncd_lzma, 'a', '') |
186
|
|
|
|
187
|
|
|
|
188
|
|
|
if __name__ == '__main__': |
189
|
|
|
unittest.main() |
190
|
|
|
|