Total Complexity | 42 |
Total Lines | 226 |
Duplicated Lines | 44.25 % |
Changes | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like tests.fuzz.fuzz_test_fingerprint often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # -*- coding: utf-8 -*- |
||
2 | |||
3 | # Copyright 2018 by Christopher C. Little. |
||
4 | # This file is part of Abydos. |
||
5 | # |
||
6 | # Abydos is free software: you can redistribute it and/or modify |
||
7 | # it under the terms of the GNU General Public License as published by |
||
8 | # the Free Software Foundation, either version 3 of the License, or |
||
9 | # (at your option) any later version. |
||
10 | # |
||
11 | # Abydos is distributed in the hope that it will be useful, |
||
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
14 | # GNU General Public License for more details. |
||
15 | # |
||
16 | # You should have received a copy of the GNU General Public License |
||
17 | # along with Abydos. If not, see <http://www.gnu.org/licenses/>. |
||
18 | |||
19 | """abydos.tests.fuzz.test_fingerprint. |
||
20 | |||
21 | This module contains fuzz tests for abydos.fingerprint |
||
22 | """ |
||
23 | |||
24 | import codecs |
||
25 | import os |
||
26 | import random |
||
27 | import unittest |
||
28 | |||
29 | from abydos.fingerprint import count_fingerprint, occurrence_fingerprint, \ |
||
30 | occurrence_halved_fingerprint, omission_key, phonetic_fingerprint, \ |
||
31 | position_fingerprint, qgram_fingerprint, skeleton_key, str_fingerprint, \ |
||
32 | synoname_toolcode |
||
33 | |||
34 | from . import fuzz, random_char |
||
35 | |||
36 | algorithms = {'str_fingerprint': str_fingerprint, |
||
37 | 'qgram_fingerprint': qgram_fingerprint, |
||
38 | 'qgram_fingerprint_3': |
||
39 | lambda name: qgram_fingerprint(name, qval=3), |
||
40 | 'qgram_fingerprint_ssj': |
||
41 | lambda name: |
||
42 | qgram_fingerprint(name, start_stop='$#', joiner=' '), |
||
43 | 'phonetic_fingerprint': phonetic_fingerprint, |
||
44 | 'skeleton_key': skeleton_key, |
||
45 | 'omission_key': omission_key, |
||
46 | 'occurrence_fingerprint': occurrence_fingerprint, |
||
47 | 'occurrence_halved_fingerprint': occurrence_halved_fingerprint, |
||
48 | 'count_fingerprint': count_fingerprint, |
||
49 | 'position_fingerprint': position_fingerprint, |
||
50 | 'synoname_toolcode': synoname_toolcode, |
||
51 | 'synoname_toolcode_2name': |
||
52 | lambda name: synoname_toolcode(name, name)} |
||
53 | |||
54 | TESTDIR = os.path.dirname(__file__) |
||
55 | |||
56 | EXTREME_TEST = False # Set to True to test EVERY single case (NB: takes hours) |
||
57 | |||
58 | if not EXTREME_TEST and os.path.isfile(TESTDIR + '/EXTREME_TEST'): |
||
59 | # EXTREME_TEST file detected -- switching to EXTREME_TEST mode... |
||
60 | EXTREME_TEST = True |
||
61 | if not EXTREME_TEST and os.path.isfile(TESTDIR + '/../EXTREME_TEST'): |
||
62 | # EXTREME_TEST file detected -- switching to EXTREME_TEST mode... |
||
63 | EXTREME_TEST = True |
||
64 | |||
65 | |||
66 | class BigListOfNaughtyStringsTestCases(unittest.TestCase): |
||
67 | """Test each fingerprint algorithm against the BLNS set. |
||
68 | |||
69 | Here, we test each algorithm against each string, but we only care that it |
||
70 | does not result in an exception. |
||
71 | |||
72 | While not actually a fuzz test, this does serve the purpose of looking for |
||
73 | errors resulting from unanticipated input. |
||
74 | """ |
||
75 | |||
76 | def test_blns(self): |
||
77 | """Test each fingerprint algorithm against the BLNS set.""" |
||
78 | blns = [] |
||
79 | with codecs.open(TESTDIR+'/corpora/blns.txt', encoding='UTF-8') as nsf: |
||
80 | for line in nsf: |
||
81 | line = line[:-1] |
||
82 | if line and line[0] != '#': |
||
83 | blns.append(line) |
||
84 | |||
85 | for algo in algorithms: |
||
86 | for ns in blns: |
||
87 | try: |
||
88 | algorithms[algo](ns) |
||
89 | except Exception as inst: |
||
90 | self.fail('Exception "{}" thrown by {} for BLNS: {}' |
||
91 | .format(inst, algo, ns)) |
||
92 | |||
93 | |||
94 | class FuzzedWordsTestCases(unittest.TestCase): |
||
95 | """Test each fingerprint algorithm against the base words set.""" |
||
96 | |||
97 | reps = 100000 * (100 if EXTREME_TEST else 1) |
||
98 | |||
99 | basewords = [] |
||
100 | with codecs.open(TESTDIR + '/corpora/basewords.txt', |
||
101 | encoding='UTF-8') as basewords_file: |
||
102 | for line in basewords_file: |
||
103 | line = line[:-1] |
||
104 | if line: |
||
105 | basewords.append(line) |
||
106 | |||
107 | def fuzz_test_base(self): |
||
108 | """Test each fingerprint algorithm against the unfuzzed base words.""" |
||
109 | for algo in algorithms: |
||
110 | for word in self.basewords: |
||
111 | try: |
||
112 | algorithms[algo](word) |
||
113 | except Exception as inst: |
||
114 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
115 | .format(inst, algo, word)) |
||
116 | |||
117 | View Code Duplication | def fuzz_test_20pct(self): |
|
|
|||
118 | """Fuzz test fingerprint algorithms against 20% fuzzed words.""" |
||
119 | for _ in range(self.reps): |
||
120 | fuzzed = fuzz(random.choice(self.basewords), fuzziness=0.2) |
||
121 | |||
122 | if EXTREME_TEST: |
||
123 | algs = list(algorithms.keys()) |
||
124 | else: |
||
125 | algs = random.choices(list(algorithms.keys()), k=5) |
||
126 | |||
127 | for algo in algs: |
||
128 | try: |
||
129 | algorithms[algo](fuzzed) |
||
130 | except Exception as inst: |
||
131 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
132 | .format(inst, algo, fuzzed)) |
||
133 | |||
134 | View Code Duplication | def fuzz_test_100pct(self): |
|
135 | """Fuzz test fingerprint algorithms against 100% fuzzed words.""" |
||
136 | for _ in range(self.reps): |
||
137 | fuzzed = fuzz(random.choice(self.basewords), fuzziness=1) |
||
138 | |||
139 | if EXTREME_TEST: |
||
140 | algs = list(algorithms.keys()) |
||
141 | else: |
||
142 | algs = random.choices(list(algorithms.keys()), k=5) |
||
143 | |||
144 | for algo in algs: |
||
145 | try: |
||
146 | algorithms[algo](fuzzed) |
||
147 | except Exception as inst: |
||
148 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
149 | .format(inst, algo, fuzzed)) |
||
150 | |||
151 | View Code Duplication | def fuzz_test_fuzz_bmp(self): |
|
152 | """Fuzz test fingerprint algorithms against BMP fuzz.""" |
||
153 | for _ in range(self.reps): |
||
154 | fuzzed = ''.join(random_char(0xffff) for _ in |
||
155 | range(0, random.randint(8, 16))) |
||
156 | |||
157 | if EXTREME_TEST: |
||
158 | algs = list(algorithms.keys()) |
||
159 | else: |
||
160 | algs = random.choices(list(algorithms.keys()), k=5) |
||
161 | |||
162 | for algo in algs: |
||
163 | try: |
||
164 | algorithms[algo](fuzzed) |
||
165 | except Exception as inst: |
||
166 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
167 | .format(inst, algo, fuzzed)) |
||
168 | |||
169 | View Code Duplication | def fuzz_test_fuzz_bmpsmp_letter(self): |
|
170 | """Fuzz test fingerprint algorithms against alphabetic BMP+SMP fuzz.""" |
||
171 | for _ in range(self.reps): |
||
172 | fuzzed = ''.join(random_char(0x1ffff, ' LETTER ') for _ in |
||
173 | range(0, random.randint(8, 16))) |
||
174 | |||
175 | if EXTREME_TEST: |
||
176 | algs = list(algorithms.keys()) |
||
177 | else: |
||
178 | algs = random.choices(list(algorithms.keys()), k=5) |
||
179 | |||
180 | for algo in algs: |
||
181 | try: |
||
182 | algorithms[algo](fuzzed) |
||
183 | except Exception as inst: |
||
184 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
185 | .format(inst, algo, fuzzed)) |
||
186 | |||
187 | View Code Duplication | def fuzz_test_fuzz_bmpsmp_latin(self): |
|
188 | """Fuzz test fingerprint algorithms against Latin BMP+SMP fuzz.""" |
||
189 | for _ in range(self.reps): |
||
190 | fuzzed = ''.join(random_char(0x1ffff, 'LATIN ') for _ in |
||
191 | range(0, random.randint(8, 16))) |
||
192 | |||
193 | if EXTREME_TEST: |
||
194 | algs = list(algorithms.keys()) |
||
195 | else: |
||
196 | algs = random.choices(list(algorithms.keys()), k=5) |
||
197 | |||
198 | for algo in algs: |
||
199 | try: |
||
200 | algorithms[algo](fuzzed) |
||
201 | except Exception as inst: |
||
202 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
203 | .format(inst, algo, fuzzed)) |
||
204 | |||
205 | View Code Duplication | def fuzz_test_fuzz_unicode(self): |
|
206 | """Fuzz test fingerprint algorithms against valid Unicode fuzz.""" |
||
207 | for _ in range(self.reps): |
||
208 | fuzzed = ''.join(random_char() for _ in |
||
209 | range(0, random.randint(8, 16))) |
||
210 | |||
211 | if EXTREME_TEST: |
||
212 | algs = list(algorithms.keys()) |
||
213 | else: |
||
214 | algs = random.choices(list(algorithms.keys()), k=5) |
||
215 | |||
216 | for algo in algs: |
||
217 | try: |
||
218 | algorithms[algo](fuzzed) |
||
219 | except Exception as inst: |
||
220 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
221 | .format(inst, algo, fuzzed)) |
||
222 | |||
223 | |||
224 | if __name__ == '__main__': |
||
225 | unittest.main() |
||
226 |