Completed
Pull Request — master (#100)
by Chris
11:24
created

BigListOfNaughtyStringsTestCases.fuzz_test_blns()   D

Complexity

Conditions 12

Size

Total Lines 23
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 12
eloc 21
nop 1
dl 0
loc 23
rs 4.8
c 0
b 0
f 0

How to fix   Complexity   

Complexity

Complex classes like tests.fuzz.fuzz_test_phonetic.BigListOfNaughtyStringsTestCases.fuzz_test_blns() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.tests.fuzz.test_phonetic.
20
21
This module contains fuzz tests for abydos.phonetic
22
"""
23
24
import codecs
25
import os
26
import random
27
import unittest
28
29
from abydos.phonetic import alpha_sis, bmpm, caverphone, davidson, \
30
    dm_soundex, dolby, double_metaphone, eudex, fonem, fuzzy_soundex, \
31
    haase_phonetik, henry_early, koelner_phonetik, koelner_phonetik_alpha, \
32
    koelner_phonetik_num_to_alpha, lein, metaphone, metasoundex, mra, \
33
    norphone, nysiis, onca, parmar_kumbharana, phonem, phonet, \
34
    phonetic_spanish, phonex, phonix, pshp_soundex_first, pshp_soundex_last, \
35
    refined_soundex, reth_schek_phonetik, roger_root, russell_index, \
36
    russell_index_alpha, russell_index_num_to_alpha, sfinxbis, sound_d, \
37
    soundex, spanish_metaphone, spfc, statistics_canada
38
39
from . import fuzz, random_char
40
41
algorithms = {'russell_index': lambda name: russell_index(name),
42
              'russell_index_num_to_alpha':
43
                  lambda name: russell_index_num_to_alpha(russell_index(name)),
44
              'russell_index_alpha': russell_index_alpha,
45
              'soundex': soundex,
46
              'reverse_soundex': lambda name: soundex(name, reverse=True),
47
              'soundex_0pad_ml6':
48
                  lambda name: soundex(name, zero_pad=True, maxlength=6),
49
              'soundex_special': lambda name: soundex(name, var='special'),
50
              'soundex_census': lambda name: soundex(name, var='Census'),
51
              'refined_soundex': refined_soundex,
52
              'refined_soundex_vowels':
53
                  lambda name: refined_soundex(name, retain_vowels=True),
54
              'refined_soundex_0pad_ml6':
55
                  lambda name:
56
                  refined_soundex(name, zero_pad=True, maxlength=6),
57
              'dm_soundex': lambda name: dm_soundex(name),
58
              'koelner_phonetik': koelner_phonetik,
59
              'koelner_phonetik_num_to_alpha':
60
                  lambda name:
61
                  koelner_phonetik_num_to_alpha(koelner_phonetik(name)),
62
              'koelner_phonetik_alpha': koelner_phonetik_alpha,
63
              'nysiis': nysiis,
64
              'nysiis_modified': lambda name: nysiis(name, modified=True),
65
              'nysiis_ml_inf':
66
                  lambda name: nysiis(name, maxlength=float('inf')),
67
              'mra': mra,
68
              'metaphone': metaphone,
69
              'double_metaphone':
70
                  lambda name: double_metaphone(name),
71
              'caverphone_1': lambda name: caverphone(name, version=1),
72
              'caverphone_2': caverphone,
73
              'alpha_sis': lambda name: alpha_sis(name),
74
              'fuzzy_soundex': fuzzy_soundex,
75
              'fuzzy_soundex_0pad_ml8':
76
                  lambda name: fuzzy_soundex(name, maxlength=8, zero_pad=True),
77
              'phonex': phonex,
78
              'phonex_0pad_ml6':
79
                  lambda name: phonex(name, maxlength=6, zero_pad=True),
80
              'phonem': phonem,
81
              'phonix': phonix,
82
              'phonix_0pad_ml6':
83
                  lambda name: phonix(name, maxlength=6, zero_pad=True),
84
              'sfinxbis': lambda name: sfinxbis(name),
85
              'sfinxbis_ml6': lambda name: sfinxbis(name, maxlength=6),
86
              'phonet_1': phonet,
87
              'phonet_2': lambda name: phonet(name, mode=2),
88
              'phonet_1_none': lambda name: phonet(name, lang='none'),
89
              'phonet_2_none': lambda name: phonet(name, mode=2, lang='none'),
90
              'spfc': lambda name: spfc(' '.join((name, name))),
91
              'statistics_canada': statistics_canada,
92
              'statistics_canada_ml8':
93
                  lambda name: statistics_canada(name, maxlength=8),
94
              'lein': lein,
95
              'lein_nopad_ml8':
96
                  lambda name: lein(name, maxlength=8, zero_pad=False),
97
              'roger_root': roger_root,
98
              'roger_root_nopad_ml8':
99
                  lambda name: roger_root(name, maxlength=8, zero_pad=False),
100
              'onca': onca,
101
              'onca_nopad_ml8':
102
                  lambda name: onca(name, maxlength=8, zero_pad=False),
103
              'eudex': lambda name: eudex(name),
104
              'haase_phonetik': lambda name: haase_phonetik(name),
105
              'haase_phonetik_primary':
106
                  lambda name: haase_phonetik(name, primary_only=True)[:1],
107
              'reth_schek_phonetik': reth_schek_phonetik,
108
              'fonem': fonem,
109
              'parmar_kumbharana': parmar_kumbharana,
110
              'davidson': davidson,
111
              'sound_d': sound_d,
112
              'sound_d_ml8': lambda name: sound_d(name, maxlength=8),
113
              'pshp_soundex_last': pshp_soundex_last,
114
              'pshp_soundex_last_german':
115
                  lambda name: pshp_soundex_last(name, german=True),
116
              'pshp_soundex_last_ml8':
117
                  lambda name: pshp_soundex_last(name, maxlength=8),
118
              'pshp_soundex_first': pshp_soundex_first,
119
              'pshp_soundex_first_german':
120
                  lambda name: pshp_soundex_first(name, german=True),
121
              'pshp_soundex_first_ml8':
122
                  lambda name: pshp_soundex_first(name, maxlength=8),
123
              'henry_early': henry_early,
124
              'henry_early_ml8': lambda name: henry_early(name, maxlength=8),
125
              'norphone': norphone,
126
              'dolby': dolby,
127
              'dolby_ml4': lambda name: dolby(name, maxlength=4),
128
              'dolby_vowels': lambda name: dolby(name, keep_vowels=True),
129
              'phonetic_spanish': phonetic_spanish,
130
              'phonetic_spanish_ml4':
131
                  lambda name: phonetic_spanish(name, maxlength=4),
132
              'spanish_metaphone': spanish_metaphone,
133
              'spanish_metaphone_modified':
134
                  lambda name: spanish_metaphone(name, modified=True),
135
              'spanish_metaphone_ml4':
136
                  lambda name: spanish_metaphone(name, maxlength=4),
137
              'metasoundex': metasoundex,
138
              'metasoundex_es': lambda name: metasoundex(name, language='es'),
139
              'bmpm': bmpm,
140
              }
141
142
143
TESTDIR = os.path.dirname(__file__)
144
145
EXTREME_TEST = False  # Set to True to test EVERY single case (NB: takes hours)
146
147
if not EXTREME_TEST and os.path.isfile(TESTDIR + '/EXTREME_TEST'):
148
    # EXTREME_TEST file detected -- switching to EXTREME_TEST mode...
149
    EXTREME_TEST = True
150
if not EXTREME_TEST and os.path.isfile(TESTDIR + '/../EXTREME_TEST'):
151
    # EXTREME_TEST file detected -- switching to EXTREME_TEST mode...
152
    EXTREME_TEST = True
153
154
155
class BigListOfNaughtyStringsTestCases(unittest.TestCase):
156
    """Test each phonetic algorithm against the BLNS set.
157
158
    Here, we test each algorithm against each string, but we only care that it
159
    does not result in an exception.
160
161
    While not actually a fuzz test, this does serve the purpose of looking for
162
    errors resulting from unanticipated input.
163
    """
164
165
    def fuzz_test_blns(self):
166
        """Test each phonetic algorithm against the BLNS set."""
167
        blns = []
168
        omit_section = False
169
        with codecs.open(TESTDIR+'/corpora/blns.txt', encoding='UTF-8') as nsf:
170
            for line in nsf:
171
                line = line[:-1]
172
                if 'Script Injection' in line:
173
                    omit_section = True
174
                if 'SQL Injection' in line:
175
                    omit_section = False
176
                if line and line[0] != '#':
177
                    bmpm_omit = omit_section | (len(line.split()) > 5)
178
                    blns.append((bmpm_omit, line))
179
180
        for algo in algorithms:
181
            for bmpm_omit, ns in blns:
182
                try:
183
                    if not (bmpm_omit and 'bmpm' in algo):
184
                        algorithms[algo](ns)
185
                except Exception as inst:
186
                    self.fail('Exception "{}" thrown by {} for BLNS: {}'
187
                              .format(inst, algo, ns))
188
189
190
class FuzzedWordsTestCases(unittest.TestCase):
191
    """Test each phonetic algorithm against the base words set."""
192
193
    reps = 100000 * (100 if EXTREME_TEST else 1)
194
195
    basewords = []
196
    with codecs.open(TESTDIR + '/corpora/basewords.txt',
197
                     encoding='UTF-8') as basewords_file:
198
        for line in basewords_file:
199
            line = line[:-1]
200
            if line:
201
                basewords.append(line)
202
203
    def fuzz_test_base(self):
204
        """Test each phonetic algorithm against the unfuzzed base words."""
205
        for algo in algorithms:
206
            for word in self.basewords:
207
                try:
208
                    if not ('bmpm' in algo and len(word) > 12):
209
                        algorithms[algo](word)
210
                except Exception as inst:
211
                    self.fail('Exception "{}" thrown by {} for word: {}'
212
                              .format(inst, algo, word))
213
214 View Code Duplication
    def fuzz_test_20pct(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
215
        """Fuzz test phonetic algorithms against 20% fuzzed words."""
216
        for _ in range(self.reps):
217
            fuzzed = fuzz(random.choice(self.basewords), fuzziness=0.2)
218
219
            if EXTREME_TEST:
220
                algs = list(algorithms.keys())
221
            else:
222
                algs = random.choices(list(algorithms.keys()), k=5)
223
224
            for algo in algs:
225
                try:
226
                    if not ('bmpm' in algo and len(fuzzed) > 12):
227
                        algorithms[algo](fuzzed)
228
                except Exception as inst:
229
                    self.fail('Exception "{}" thrown by {} for word: {}'
230
                              .format(inst, algo, fuzzed))
231
232 View Code Duplication
    def fuzz_test_100pct(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
233
        """Fuzz test phonetic algorithms against 100% fuzzed words."""
234
        for _ in range(self.reps):
235
            fuzzed = fuzz(random.choice(self.basewords), fuzziness=1)
236
237
            if EXTREME_TEST:
238
                algs = list(algorithms.keys())
239
            else:
240
                algs = random.choices(list(algorithms.keys()), k=5)
241
242
            for algo in algs:
243
                try:
244
                    if not ('bmpm' in algo and len(fuzzed) > 12):
245
                        algorithms[algo](fuzzed)
246
                except Exception as inst:
247
                    self.fail('Exception "{}" thrown by {} for word: {}'
248
                              .format(inst, algo, fuzzed))
249
250 View Code Duplication
    def fuzz_test_fuzz_bmp(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
251
        """Fuzz test phonetic algorithms against BMP fuzz."""
252
        for _ in range(self.reps):
253
            fuzzed = ''.join(random_char(0xffff) for _ in
254
                             range(0, random.randint(8, 16)))
255
256
            if EXTREME_TEST:
257
                algs = list(algorithms.keys())
258
            else:
259
                algs = random.choices(list(algorithms.keys()), k=5)
260
261
            for algo in algs:
262
                try:
263
                    algorithms[algo](fuzzed)
264
                except Exception as inst:
265
                    self.fail('Exception "{}" thrown by {} for word: {}'
266
                              .format(inst, algo, fuzzed))
267
268 View Code Duplication
    def fuzz_test_fuzz_bmpsmp_letter(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
269
        """Fuzz test phonetic algorithms against alphabetic BMP+SMP fuzz."""
270
        for _ in range(self.reps):
271
            fuzzed = ''.join(random_char(0x1ffff, ' LETTER ') for _ in
272
                             range(0, random.randint(8, 16)))
273
274
            if EXTREME_TEST:
275
                algs = list(algorithms.keys())
276
            else:
277
                algs = random.choices(list(algorithms.keys()), k=5)
278
279
            for algo in algs:
280
                try:
281
                    algorithms[algo](fuzzed)
282
                except Exception as inst:
283
                    self.fail('Exception "{}" thrown by {} for word: {}'
284
                              .format(inst, algo, fuzzed))
285
286 View Code Duplication
    def fuzz_test_fuzz_bmpsmp_latin(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
287
        """Fuzz test phonetic algorithms against Latin BMP+SMP fuzz."""
288
        for _ in range(self.reps):
289
            fuzzed = ''.join(random_char(0x1ffff, 'LATIN ') for _ in
290
                             range(0, random.randint(8, 16)))
291
292
            if EXTREME_TEST:
293
                algs = list(algorithms.keys())
294
            else:
295
                algs = random.choices(list(algorithms.keys()), k=5)
296
297
            for algo in algs:
298
                try:
299
                    algorithms[algo](fuzzed)
300
                except Exception as inst:
301
                    self.fail('Exception "{}" thrown by {} for word: {}'
302
                              .format(inst, algo, fuzzed))
303
304 View Code Duplication
    def fuzz_test_fuzz_unicode(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
305
        """Fuzz test phonetic algorithms against valid Unicode fuzz."""
306
        for _ in range(self.reps):
307
            fuzzed = ''.join(random_char() for _ in
308
                             range(0, random.randint(8, 16)))
309
310
            if EXTREME_TEST:
311
                algs = list(algorithms.keys())
312
            else:
313
                algs = random.choices(list(algorithms.keys()), k=5)
314
315
            for algo in algs:
316
                try:
317
                    algorithms[algo](fuzzed)
318
                except Exception as inst:
319
                    self.fail('Exception "{}" thrown by {} for word: {}'
320
                              .format(inst, algo, fuzzed))
321
322
323
if __name__ == '__main__':
324
    unittest.main()
325