Completed
Branch master (87ccc1)
by Chris
08:42
created

tests.fuzz.fuzz_test_fingerprint   A

Complexity

Total Complexity 42

Size/Duplication

Total Lines 216
Duplicated Lines 46.3 %

Importance

Changes 0
Metric Value
eloc 138
dl 100
loc 216
rs 9.0399
c 0
b 0
f 0
wmc 42

How to fix   Duplicated Code    Complexity   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

Complexity

 Tip:   Before tackling complexity, make sure that you eliminate any duplication first. This often can reduce the size of classes significantly.

Complex classes like tests.fuzz.fuzz_test_fingerprint often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.tests.fuzz.test_fingerprint.
20
21
This module contains fuzz tests for abydos.fingerprint
22
"""
23
24
import codecs
25
import unittest
26
from random import choice, randint, sample
27
28
from abydos.fingerprint.basic import phonetic_fingerprint, qgram_fingerprint, \
29
    str_fingerprint
30
from abydos.fingerprint.lightweight import count_fingerprint, \
31
    occurrence_fingerprint, occurrence_halved_fingerprint, position_fingerprint
32
from abydos.fingerprint.speedcop import omission_key, skeleton_key
33
from abydos.fingerprint.synoname import synoname_toolcode
34
35
from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char
36
37
algorithms = {'str_fingerprint': str_fingerprint,
38
              'qgram_fingerprint': qgram_fingerprint,
39
              'qgram_fingerprint_3':
40
                  lambda name: qgram_fingerprint(name, qval=3),
41
              'qgram_fingerprint_ssj':
42
                  lambda name:
43
                  qgram_fingerprint(name, start_stop='$#', joiner=' '),
44
              'phonetic_fingerprint': phonetic_fingerprint,
45
              'skeleton_key': skeleton_key,
46
              'omission_key': omission_key,
47
              'occurrence_fingerprint': occurrence_fingerprint,
48
              'occurrence_halved_fingerprint': occurrence_halved_fingerprint,
49
              'count_fingerprint': count_fingerprint,
50
              'position_fingerprint': position_fingerprint,
51
              'synoname_toolcode': synoname_toolcode,
52
              'synoname_toolcode_2name':
53
                  lambda name: synoname_toolcode(name, name)}
54
55
56
class BigListOfNaughtyStringsTestCases(unittest.TestCase):
57
    """Test each fingerprint algorithm against the BLNS set.
58
59
    Here, we test each algorithm against each string, but we only care that it
60
    does not result in an exception.
61
62
    While not actually a fuzz test, this does serve the purpose of looking for
63
    errors resulting from unanticipated input.
64
    """
65
66
    def test_blns(self):
67
        """Test each fingerprint algorithm against the BLNS set."""
68
        blns = []
69
        with codecs.open(_corpus_file('blns.txt'), encoding='UTF-8') as nsf:
70
            for line in nsf:
71
                line = line[:-1]
72
                if line and line[0] != '#':
73
                    blns.append(line)
74
75
        for algo in algorithms:
76
            for ns in blns:
77
                try:
78
                    algorithms[algo](ns)
79
                except Exception as inst:
80
                    self.fail('Exception "{}" thrown by {} for BLNS: {}'
81
                              .format(inst, algo, ns))
82
83
84
class FuzzedWordsTestCases(unittest.TestCase):
85
    """Test each fingerprint algorithm against the base words set."""
86
87
    reps = 1000 * (10000 if EXTREME_TEST else 1)
88
89
    basewords = []
90
    with codecs.open(_corpus_file('basewords.txt'),
91
                     encoding='UTF-8') as basewords_file:
92
        for line in basewords_file:
93
            line = line[:-1]
94
            if line:
95
                basewords.append(line)
96
97
    def fuzz_test_base(self):
98
        """Test each fingerprint algorithm against the unfuzzed base words."""
99
        for algo in algorithms:
100
            for word in self.basewords:
101
                try:
102
                    algorithms[algo](word)
103
                except Exception as inst:
104
                    self.fail('Exception "{}" thrown by {} for word: {}'
105
                              .format(inst, algo, word))
106
107
    def fuzz_test_20pct(self):
108
        """Fuzz test fingerprint algorithms against 20% fuzzed words."""
109
        for _ in range(self.reps):
110
            fuzzed = _fuzz(choice(self.basewords), fuzziness=0.2)  # noqa: S311
111
112
            if EXTREME_TEST:
113
                algs = list(algorithms.keys())
114
            else:
115
                algs = sample(list(algorithms.keys()), k=5)  # noqa: S311
116
117
            for algo in algs:
118
                try:
119
                    algorithms[algo](fuzzed)
120
                except Exception as inst:
121
                    self.fail('Exception "{}" thrown by {} for word: {}'
122
                              .format(inst, algo, fuzzed))
123
124
    def fuzz_test_100pct(self):
125
        """Fuzz test fingerprint algorithms against 100% fuzzed words."""
126
        for _ in range(self.reps):
127
            fuzzed = _fuzz(choice(self.basewords), fuzziness=1)  # noqa: S311
128
129
            if EXTREME_TEST:
130
                algs = list(algorithms.keys())
131
            else:
132
                algs = sample(list(algorithms.keys()), k=5)  # noqa: S311
133
134
            for algo in algs:
135
                try:
136
                    algorithms[algo](fuzzed)
137
                except Exception as inst:
138
                    self.fail('Exception "{}" thrown by {} for word: {}'
139
                              .format(inst, algo, fuzzed))
140
141
    def fuzz_test_fuzz_bmp(self):
142
        """Fuzz test fingerprint algorithms against BMP fuzz."""
143
        for _ in range(self.reps):
144
            fuzzed = ''.join(_random_char(0xffff) for _ in
145
                             range(0, randint(8, 16)))  # noqa: S311
146
147
            if EXTREME_TEST:
148
                algs = list(algorithms.keys())
149
            else:
150
                algs = sample(list(algorithms.keys()), k=5)  # noqa: S311
151
152
            for algo in algs:
153
                try:
154
                    algorithms[algo](fuzzed)
155
                except Exception as inst:
156
                    self.fail('Exception "{}" thrown by {} for word: {}'
157
                              .format(inst, algo, fuzzed))
158
159
    def fuzz_test_fuzz_bmpsmp_letter(self):
160
        """Fuzz test fingerprint algorithms against alphabetic BMP+SMP fuzz."""
161
        for _ in range(self.reps):
162
            fuzzed = ''.join(_random_char(0x1ffff, ' LETTER ') for _ in
163
                             range(0, randint(8, 16)))  # noqa: S311
164
165
            if EXTREME_TEST:
166
                algs = list(algorithms.keys())
167
            else:
168
                algs = sample(list(algorithms.keys()), k=5)  # noqa: S311
169
170
            for algo in algs:
171
                try:
172
                    algorithms[algo](fuzzed)
173
                except Exception as inst:
174
                    self.fail('Exception "{}" thrown by {} for word: {}'
175
                              .format(inst, algo, fuzzed))
176
177
    def fuzz_test_fuzz_bmpsmp_latin(self):
178
        """Fuzz test fingerprint algorithms against Latin BMP+SMP fuzz."""
179
        for _ in range(self.reps):
180
            fuzzed = ''.join(_random_char(0x1ffff, 'LATIN ') for _ in
181
                             range(0, randint(8, 16)))  # noqa: S311
182
183
            if EXTREME_TEST:
184
                algs = list(algorithms.keys())
185
            else:
186
                algs = sample(list(algorithms.keys()), k=5)  # noqa: S311
187
188
            for algo in algs:
189
                try:
190
                    algorithms[algo](fuzzed)
191
                except Exception as inst:
192
                    self.fail('Exception "{}" thrown by {} for word: {}'
193
                              .format(inst, algo, fuzzed))
194
195
    def fuzz_test_fuzz_unicode(self):
196
        """Fuzz test fingerprint algorithms against valid Unicode fuzz."""
197
        for _ in range(self.reps):
198
            fuzzed = ''.join(_random_char() for _ in
199
                             range(0, randint(8, 16)))  # noqa: S311
200
201
            if EXTREME_TEST:
202
                algs = list(algorithms.keys())
203
            else:
204
                algs = sample(list(algorithms.keys()), k=5)  # noqa: S311
205
206
            for algo in algs:
207
                try:
208
                    algorithms[algo](fuzzed)
209
                except Exception as inst:
210
                    self.fail('Exception "{}" thrown by {} for word: {}'
211
                              .format(inst, algo, fuzzed))
212
213
214
if __name__ == '__main__':
215
    unittest.main()
216