| Total Complexity | 42 |
| Total Lines | 226 |
| Duplicated Lines | 44.25 % |
| Changes | 0 | ||
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like tests.fuzz.fuzz_test_fingerprint often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | # -*- coding: utf-8 -*- |
||
| 2 | |||
| 3 | # Copyright 2018 by Christopher C. Little. |
||
| 4 | # This file is part of Abydos. |
||
| 5 | # |
||
| 6 | # Abydos is free software: you can redistribute it and/or modify |
||
| 7 | # it under the terms of the GNU General Public License as published by |
||
| 8 | # the Free Software Foundation, either version 3 of the License, or |
||
| 9 | # (at your option) any later version. |
||
| 10 | # |
||
| 11 | # Abydos is distributed in the hope that it will be useful, |
||
| 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
| 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
| 14 | # GNU General Public License for more details. |
||
| 15 | # |
||
| 16 | # You should have received a copy of the GNU General Public License |
||
| 17 | # along with Abydos. If not, see <http://www.gnu.org/licenses/>. |
||
| 18 | |||
| 19 | """abydos.tests.fuzz.test_fingerprint. |
||
| 20 | |||
| 21 | This module contains fuzz tests for abydos.fingerprint |
||
| 22 | """ |
||
| 23 | |||
| 24 | import codecs |
||
| 25 | import os |
||
| 26 | import random |
||
| 27 | import unittest |
||
| 28 | |||
| 29 | from abydos.fingerprint import count_fingerprint, occurrence_fingerprint, \ |
||
| 30 | occurrence_halved_fingerprint, omission_key, phonetic_fingerprint, \ |
||
| 31 | position_fingerprint, qgram_fingerprint, skeleton_key, str_fingerprint, \ |
||
| 32 | synoname_toolcode |
||
| 33 | |||
| 34 | from . import fuzz, random_char |
||
| 35 | |||
| 36 | algorithms = {'str_fingerprint': str_fingerprint, |
||
| 37 | 'qgram_fingerprint': qgram_fingerprint, |
||
| 38 | 'qgram_fingerprint_3': |
||
| 39 | lambda name: qgram_fingerprint(name, qval=3), |
||
| 40 | 'qgram_fingerprint_ssj': |
||
| 41 | lambda name: |
||
| 42 | qgram_fingerprint(name, start_stop='$#', joiner=' '), |
||
| 43 | 'phonetic_fingerprint': phonetic_fingerprint, |
||
| 44 | 'skeleton_key': skeleton_key, |
||
| 45 | 'omission_key': omission_key, |
||
| 46 | 'occurrence_fingerprint': occurrence_fingerprint, |
||
| 47 | 'occurrence_halved_fingerprint': occurrence_halved_fingerprint, |
||
| 48 | 'count_fingerprint': count_fingerprint, |
||
| 49 | 'position_fingerprint': position_fingerprint, |
||
| 50 | 'synoname_toolcode': synoname_toolcode, |
||
| 51 | 'synoname_toolcode_2name': |
||
| 52 | lambda name: synoname_toolcode(name, name)} |
||
| 53 | |||
| 54 | TESTDIR = os.path.dirname(__file__) |
||
| 55 | |||
| 56 | EXTREME_TEST = False # Set to True to test EVERY single case (NB: takes hours) |
||
| 57 | |||
| 58 | if not EXTREME_TEST and os.path.isfile(TESTDIR + '/EXTREME_TEST'): |
||
| 59 | # EXTREME_TEST file detected -- switching to EXTREME_TEST mode... |
||
| 60 | EXTREME_TEST = True |
||
| 61 | if not EXTREME_TEST and os.path.isfile(TESTDIR + '/../EXTREME_TEST'): |
||
| 62 | # EXTREME_TEST file detected -- switching to EXTREME_TEST mode... |
||
| 63 | EXTREME_TEST = True |
||
| 64 | |||
| 65 | |||
| 66 | class BigListOfNaughtyStringsTestCases(unittest.TestCase): |
||
| 67 | """Test each fingerprint algorithm against the BLNS set. |
||
| 68 | |||
| 69 | Here, we test each algorithm against each string, but we only care that it |
||
| 70 | does not result in an exception. |
||
| 71 | |||
| 72 | While not actually a fuzz test, this does serve the purpose of looking for |
||
| 73 | errors resulting from unanticipated input. |
||
| 74 | """ |
||
| 75 | |||
| 76 | def test_blns(self): |
||
| 77 | """Test each fingerprint algorithm against the BLNS set.""" |
||
| 78 | blns = [] |
||
| 79 | with codecs.open(TESTDIR+'/corpora/blns.txt', encoding='UTF-8') as nsf: |
||
| 80 | for line in nsf: |
||
| 81 | line = line[:-1] |
||
| 82 | if line and line[0] != '#': |
||
| 83 | blns.append(line) |
||
| 84 | |||
| 85 | for algo in algorithms: |
||
| 86 | for ns in blns: |
||
| 87 | try: |
||
| 88 | algorithms[algo](ns) |
||
| 89 | except Exception as inst: |
||
| 90 | self.fail('Exception "{}" thrown by {} for BLNS: {}' |
||
| 91 | .format(inst, algo, ns)) |
||
| 92 | |||
| 93 | |||
| 94 | class FuzzedWordsTestCases(unittest.TestCase): |
||
| 95 | """Test each fingerprint algorithm against the base words set.""" |
||
| 96 | |||
| 97 | reps = 100000 * (100 if EXTREME_TEST else 1) |
||
| 98 | |||
| 99 | basewords = [] |
||
| 100 | with codecs.open(TESTDIR + '/corpora/basewords.txt', |
||
| 101 | encoding='UTF-8') as basewords_file: |
||
| 102 | for line in basewords_file: |
||
| 103 | line = line[:-1] |
||
| 104 | if line: |
||
| 105 | basewords.append(line) |
||
| 106 | |||
| 107 | def fuzz_test_base(self): |
||
| 108 | """Test each fingerprint algorithm against the unfuzzed base words.""" |
||
| 109 | for algo in algorithms: |
||
| 110 | for word in self.basewords: |
||
| 111 | try: |
||
| 112 | algorithms[algo](word) |
||
| 113 | except Exception as inst: |
||
| 114 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
| 115 | .format(inst, algo, word)) |
||
| 116 | |||
| 117 | View Code Duplication | def fuzz_test_20pct(self): |
|
|
|
|||
| 118 | """Fuzz test fingerprint algorithms against 20% fuzzed words.""" |
||
| 119 | for _ in range(self.reps): |
||
| 120 | fuzzed = fuzz(random.choice(self.basewords), fuzziness=0.2) |
||
| 121 | |||
| 122 | if EXTREME_TEST: |
||
| 123 | algs = list(algorithms.keys()) |
||
| 124 | else: |
||
| 125 | algs = random.choices(list(algorithms.keys()), k=5) |
||
| 126 | |||
| 127 | for algo in algs: |
||
| 128 | try: |
||
| 129 | algorithms[algo](fuzzed) |
||
| 130 | except Exception as inst: |
||
| 131 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
| 132 | .format(inst, algo, fuzzed)) |
||
| 133 | |||
| 134 | View Code Duplication | def fuzz_test_100pct(self): |
|
| 135 | """Fuzz test fingerprint algorithms against 100% fuzzed words.""" |
||
| 136 | for _ in range(self.reps): |
||
| 137 | fuzzed = fuzz(random.choice(self.basewords), fuzziness=1) |
||
| 138 | |||
| 139 | if EXTREME_TEST: |
||
| 140 | algs = list(algorithms.keys()) |
||
| 141 | else: |
||
| 142 | algs = random.choices(list(algorithms.keys()), k=5) |
||
| 143 | |||
| 144 | for algo in algs: |
||
| 145 | try: |
||
| 146 | algorithms[algo](fuzzed) |
||
| 147 | except Exception as inst: |
||
| 148 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
| 149 | .format(inst, algo, fuzzed)) |
||
| 150 | |||
| 151 | View Code Duplication | def fuzz_test_fuzz_bmp(self): |
|
| 152 | """Fuzz test fingerprint algorithms against BMP fuzz.""" |
||
| 153 | for _ in range(self.reps): |
||
| 154 | fuzzed = ''.join(random_char(0xffff) for _ in |
||
| 155 | range(0, random.randint(8, 16))) |
||
| 156 | |||
| 157 | if EXTREME_TEST: |
||
| 158 | algs = list(algorithms.keys()) |
||
| 159 | else: |
||
| 160 | algs = random.choices(list(algorithms.keys()), k=5) |
||
| 161 | |||
| 162 | for algo in algs: |
||
| 163 | try: |
||
| 164 | algorithms[algo](fuzzed) |
||
| 165 | except Exception as inst: |
||
| 166 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
| 167 | .format(inst, algo, fuzzed)) |
||
| 168 | |||
| 169 | View Code Duplication | def fuzz_test_fuzz_bmpsmp_letter(self): |
|
| 170 | """Fuzz test fingerprint algorithms against alphabetic BMP+SMP fuzz.""" |
||
| 171 | for _ in range(self.reps): |
||
| 172 | fuzzed = ''.join(random_char(0x1ffff, ' LETTER ') for _ in |
||
| 173 | range(0, random.randint(8, 16))) |
||
| 174 | |||
| 175 | if EXTREME_TEST: |
||
| 176 | algs = list(algorithms.keys()) |
||
| 177 | else: |
||
| 178 | algs = random.choices(list(algorithms.keys()), k=5) |
||
| 179 | |||
| 180 | for algo in algs: |
||
| 181 | try: |
||
| 182 | algorithms[algo](fuzzed) |
||
| 183 | except Exception as inst: |
||
| 184 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
| 185 | .format(inst, algo, fuzzed)) |
||
| 186 | |||
| 187 | View Code Duplication | def fuzz_test_fuzz_bmpsmp_latin(self): |
|
| 188 | """Fuzz test fingerprint algorithms against Latin BMP+SMP fuzz.""" |
||
| 189 | for _ in range(self.reps): |
||
| 190 | fuzzed = ''.join(random_char(0x1ffff, 'LATIN ') for _ in |
||
| 191 | range(0, random.randint(8, 16))) |
||
| 192 | |||
| 193 | if EXTREME_TEST: |
||
| 194 | algs = list(algorithms.keys()) |
||
| 195 | else: |
||
| 196 | algs = random.choices(list(algorithms.keys()), k=5) |
||
| 197 | |||
| 198 | for algo in algs: |
||
| 199 | try: |
||
| 200 | algorithms[algo](fuzzed) |
||
| 201 | except Exception as inst: |
||
| 202 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
| 203 | .format(inst, algo, fuzzed)) |
||
| 204 | |||
| 205 | View Code Duplication | def fuzz_test_fuzz_unicode(self): |
|
| 206 | """Fuzz test fingerprint algorithms against valid Unicode fuzz.""" |
||
| 207 | for _ in range(self.reps): |
||
| 208 | fuzzed = ''.join(random_char() for _ in |
||
| 209 | range(0, random.randint(8, 16))) |
||
| 210 | |||
| 211 | if EXTREME_TEST: |
||
| 212 | algs = list(algorithms.keys()) |
||
| 213 | else: |
||
| 214 | algs = random.choices(list(algorithms.keys()), k=5) |
||
| 215 | |||
| 216 | for algo in algs: |
||
| 217 | try: |
||
| 218 | algorithms[algo](fuzzed) |
||
| 219 | except Exception as inst: |
||
| 220 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
| 221 | .format(inst, algo, fuzzed)) |
||
| 222 | |||
| 223 | |||
| 224 | if __name__ == '__main__': |
||
| 225 | unittest.main() |
||
| 226 |