Completed
Pull Request — master (#100)
by Chris
11:24
created

FuzzedWordsTestCases.fuzz_test_fuzz_bmp()   A

Complexity

Conditions 5

Size

Total Lines 17
Code Lines 13

Duplication

Lines 17
Ratio 100 %

Importance

Changes 0
Metric Value
cc 5
eloc 13
nop 1
dl 17
loc 17
rs 9.2833
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
"""abydos.tests.fuzz.test_fingerprint.
20
21
This module contains fuzz tests for abydos.fingerprint
22
"""
23
24
import codecs
25
import os
26
import random
27
import unittest
28
29
from abydos.fingerprint import count_fingerprint, occurrence_fingerprint, \
30
    occurrence_halved_fingerprint, omission_key, phonetic_fingerprint, \
31
    position_fingerprint, qgram_fingerprint, skeleton_key, str_fingerprint, \
32
    synoname_toolcode
33
34
from . import fuzz, random_char
35
36
algorithms = {'str_fingerprint': str_fingerprint,
37
              'qgram_fingerprint': qgram_fingerprint,
38
              'qgram_fingerprint_3':
39
                  lambda name: qgram_fingerprint(name, qval=3),
40
              'qgram_fingerprint_ssj':
41
                  lambda name:
42
                  qgram_fingerprint(name, start_stop='$#', joiner=' '),
43
              'phonetic_fingerprint': phonetic_fingerprint,
44
              'skeleton_key': skeleton_key,
45
              'omission_key': omission_key,
46
              'occurrence_fingerprint': occurrence_fingerprint,
47
              'occurrence_halved_fingerprint': occurrence_halved_fingerprint,
48
              'count_fingerprint': count_fingerprint,
49
              'position_fingerprint': position_fingerprint,
50
              'synoname_toolcode': synoname_toolcode,
51
              'synoname_toolcode_2name':
52
                  lambda name: synoname_toolcode(name, name)}
53
54
TESTDIR = os.path.dirname(__file__)
55
56
EXTREME_TEST = False  # Set to True to test EVERY single case (NB: takes hours)
57
58
if not EXTREME_TEST and os.path.isfile(TESTDIR + '/EXTREME_TEST'):
59
    # EXTREME_TEST file detected -- switching to EXTREME_TEST mode...
60
    EXTREME_TEST = True
61
if not EXTREME_TEST and os.path.isfile(TESTDIR + '/../EXTREME_TEST'):
62
    # EXTREME_TEST file detected -- switching to EXTREME_TEST mode...
63
    EXTREME_TEST = True
64
65
66
class BigListOfNaughtyStringsTestCases(unittest.TestCase):
67
    """Test each fingerprint algorithm against the BLNS set.
68
69
    Here, we test each algorithm against each string, but we only care that it
70
    does not result in an exception.
71
72
    While not actually a fuzz test, this does serve the purpose of looking for
73
    errors resulting from unanticipated input.
74
    """
75
76
    def test_blns(self):
77
        """Test each fingerprint algorithm against the BLNS set."""
78
        blns = []
79
        with codecs.open(TESTDIR+'/corpora/blns.txt', encoding='UTF-8') as nsf:
80
            for line in nsf:
81
                line = line[:-1]
82
                if line and line[0] != '#':
83
                    blns.append(line)
84
85
        for algo in algorithms:
86
            for ns in blns:
87
                try:
88
                    algorithms[algo](ns)
89
                except Exception as inst:
90
                    self.fail('Exception "{}" thrown by {} for BLNS: {}'
91
                              .format(inst, algo, ns))
92
93
94
class FuzzedWordsTestCases(unittest.TestCase):
95
    """Test each fingerprint algorithm against the base words set."""
96
97
    reps = 100000 * (100 if EXTREME_TEST else 1)
98
99
    basewords = []
100
    with codecs.open(TESTDIR + '/corpora/basewords.txt',
101
                     encoding='UTF-8') as basewords_file:
102
        for line in basewords_file:
103
            line = line[:-1]
104
            if line:
105
                basewords.append(line)
106
107
    def fuzz_test_base(self):
108
        """Test each fingerprint algorithm against the unfuzzed base words."""
109
        for algo in algorithms:
110
            for word in self.basewords:
111
                try:
112
                    algorithms[algo](word)
113
                except Exception as inst:
114
                    self.fail('Exception "{}" thrown by {} for word: {}'
115
                              .format(inst, algo, word))
116
117 View Code Duplication
    def fuzz_test_20pct(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
118
        """Fuzz test fingerprint algorithms against 20% fuzzed words."""
119
        for _ in range(self.reps):
120
            fuzzed = fuzz(random.choice(self.basewords), fuzziness=0.2)
121
122
            if EXTREME_TEST:
123
                algs = list(algorithms.keys())
124
            else:
125
                algs = random.choices(list(algorithms.keys()), k=5)
126
127
            for algo in algs:
128
                try:
129
                    algorithms[algo](fuzzed)
130
                except Exception as inst:
131
                    self.fail('Exception "{}" thrown by {} for word: {}'
132
                              .format(inst, algo, fuzzed))
133
134 View Code Duplication
    def fuzz_test_100pct(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
135
        """Fuzz test fingerprint algorithms against 100% fuzzed words."""
136
        for _ in range(self.reps):
137
            fuzzed = fuzz(random.choice(self.basewords), fuzziness=1)
138
139
            if EXTREME_TEST:
140
                algs = list(algorithms.keys())
141
            else:
142
                algs = random.choices(list(algorithms.keys()), k=5)
143
144
            for algo in algs:
145
                try:
146
                    algorithms[algo](fuzzed)
147
                except Exception as inst:
148
                    self.fail('Exception "{}" thrown by {} for word: {}'
149
                              .format(inst, algo, fuzzed))
150
151 View Code Duplication
    def fuzz_test_fuzz_bmp(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
152
        """Fuzz test fingerprint algorithms against BMP fuzz."""
153
        for _ in range(self.reps):
154
            fuzzed = ''.join(random_char(0xffff) for _ in
155
                             range(0, random.randint(8, 16)))
156
157
            if EXTREME_TEST:
158
                algs = list(algorithms.keys())
159
            else:
160
                algs = random.choices(list(algorithms.keys()), k=5)
161
162
            for algo in algs:
163
                try:
164
                    algorithms[algo](fuzzed)
165
                except Exception as inst:
166
                    self.fail('Exception "{}" thrown by {} for word: {}'
167
                              .format(inst, algo, fuzzed))
168
169 View Code Duplication
    def fuzz_test_fuzz_bmpsmp_letter(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
170
        """Fuzz test fingerprint algorithms against alphabetic BMP+SMP fuzz."""
171
        for _ in range(self.reps):
172
            fuzzed = ''.join(random_char(0x1ffff, ' LETTER ') for _ in
173
                             range(0, random.randint(8, 16)))
174
175
            if EXTREME_TEST:
176
                algs = list(algorithms.keys())
177
            else:
178
                algs = random.choices(list(algorithms.keys()), k=5)
179
180
            for algo in algs:
181
                try:
182
                    algorithms[algo](fuzzed)
183
                except Exception as inst:
184
                    self.fail('Exception "{}" thrown by {} for word: {}'
185
                              .format(inst, algo, fuzzed))
186
187 View Code Duplication
    def fuzz_test_fuzz_bmpsmp_latin(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
188
        """Fuzz test fingerprint algorithms against Latin BMP+SMP fuzz."""
189
        for _ in range(self.reps):
190
            fuzzed = ''.join(random_char(0x1ffff, 'LATIN ') for _ in
191
                             range(0, random.randint(8, 16)))
192
193
            if EXTREME_TEST:
194
                algs = list(algorithms.keys())
195
            else:
196
                algs = random.choices(list(algorithms.keys()), k=5)
197
198
            for algo in algs:
199
                try:
200
                    algorithms[algo](fuzzed)
201
                except Exception as inst:
202
                    self.fail('Exception "{}" thrown by {} for word: {}'
203
                              .format(inst, algo, fuzzed))
204
205 View Code Duplication
    def fuzz_test_fuzz_unicode(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
206
        """Fuzz test fingerprint algorithms against valid Unicode fuzz."""
207
        for _ in range(self.reps):
208
            fuzzed = ''.join(random_char() for _ in
209
                             range(0, random.randint(8, 16)))
210
211
            if EXTREME_TEST:
212
                algs = list(algorithms.keys())
213
            else:
214
                algs = random.choices(list(algorithms.keys()), k=5)
215
216
            for algo in algs:
217
                try:
218
                    algorithms[algo](fuzzed)
219
                except Exception as inst:
220
                    self.fail('Exception "{}" thrown by {} for word: {}'
221
                              .format(inst, algo, fuzzed))
222
223
224
if __name__ == '__main__':
225
    unittest.main()
226