| Total Complexity | 42 |
| Total Lines | 216 |
| Duplicated Lines | 46.3 % |
| Changes | 0 | ||
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like tests.fuzz.fuzz_test_fingerprint often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | # -*- coding: utf-8 -*- |
||
| 2 | |||
| 3 | # Copyright 2018 by Christopher C. Little. |
||
| 4 | # This file is part of Abydos. |
||
| 5 | # |
||
| 6 | # Abydos is free software: you can redistribute it and/or modify |
||
| 7 | # it under the terms of the GNU General Public License as published by |
||
| 8 | # the Free Software Foundation, either version 3 of the License, or |
||
| 9 | # (at your option) any later version. |
||
| 10 | # |
||
| 11 | # Abydos is distributed in the hope that it will be useful, |
||
| 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
| 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
| 14 | # GNU General Public License for more details. |
||
| 15 | # |
||
| 16 | # You should have received a copy of the GNU General Public License |
||
| 17 | # along with Abydos. If not, see <http://www.gnu.org/licenses/>. |
||
| 18 | |||
| 19 | """abydos.tests.fuzz.test_fingerprint. |
||
| 20 | |||
| 21 | This module contains fuzz tests for abydos.fingerprint |
||
| 22 | """ |
||
| 23 | |||
| 24 | import codecs |
||
| 25 | import unittest |
||
| 26 | from random import choice, randint, sample |
||
| 27 | |||
| 28 | from abydos.fingerprint.basic import phonetic_fingerprint, qgram_fingerprint, \ |
||
| 29 | str_fingerprint |
||
| 30 | from abydos.fingerprint.lightweight import count_fingerprint, \ |
||
| 31 | occurrence_fingerprint, occurrence_halved_fingerprint, position_fingerprint |
||
| 32 | from abydos.fingerprint.speedcop import omission_key, skeleton_key |
||
| 33 | from abydos.fingerprint.synoname import synoname_toolcode |
||
| 34 | |||
| 35 | from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char |
||
| 36 | |||
| 37 | algorithms = {'str_fingerprint': str_fingerprint, |
||
| 38 | 'qgram_fingerprint': qgram_fingerprint, |
||
| 39 | 'qgram_fingerprint_3': |
||
| 40 | lambda name: qgram_fingerprint(name, qval=3), |
||
| 41 | 'qgram_fingerprint_ssj': |
||
| 42 | lambda name: |
||
| 43 | qgram_fingerprint(name, start_stop='$#', joiner=' '), |
||
| 44 | 'phonetic_fingerprint': phonetic_fingerprint, |
||
| 45 | 'skeleton_key': skeleton_key, |
||
| 46 | 'omission_key': omission_key, |
||
| 47 | 'occurrence_fingerprint': occurrence_fingerprint, |
||
| 48 | 'occurrence_halved_fingerprint': occurrence_halved_fingerprint, |
||
| 49 | 'count_fingerprint': count_fingerprint, |
||
| 50 | 'position_fingerprint': position_fingerprint, |
||
| 51 | 'synoname_toolcode': synoname_toolcode, |
||
| 52 | 'synoname_toolcode_2name': |
||
| 53 | lambda name: synoname_toolcode(name, name)} |
||
| 54 | |||
| 55 | |||
| 56 | class BigListOfNaughtyStringsTestCases(unittest.TestCase): |
||
| 57 | """Test each fingerprint algorithm against the BLNS set. |
||
| 58 | |||
| 59 | Here, we test each algorithm against each string, but we only care that it |
||
| 60 | does not result in an exception. |
||
| 61 | |||
| 62 | While not actually a fuzz test, this does serve the purpose of looking for |
||
| 63 | errors resulting from unanticipated input. |
||
| 64 | """ |
||
| 65 | |||
| 66 | def test_blns(self): |
||
| 67 | """Test each fingerprint algorithm against the BLNS set.""" |
||
| 68 | blns = [] |
||
| 69 | with codecs.open(_corpus_file('blns.txt'), encoding='UTF-8') as nsf: |
||
| 70 | for line in nsf: |
||
| 71 | line = line[:-1] |
||
| 72 | if line and line[0] != '#': |
||
| 73 | blns.append(line) |
||
| 74 | |||
| 75 | for algo in algorithms: |
||
| 76 | for ns in blns: |
||
| 77 | try: |
||
| 78 | algorithms[algo](ns) |
||
| 79 | except Exception as inst: |
||
| 80 | self.fail('Exception "{}" thrown by {} for BLNS: {}' |
||
| 81 | .format(inst, algo, ns)) |
||
| 82 | |||
| 83 | |||
| 84 | class FuzzedWordsTestCases(unittest.TestCase): |
||
| 85 | """Test each fingerprint algorithm against the base words set.""" |
||
| 86 | |||
| 87 | reps = 1000 * (10000 if EXTREME_TEST else 1) |
||
| 88 | |||
| 89 | basewords = [] |
||
| 90 | with codecs.open(_corpus_file('basewords.txt'), |
||
| 91 | encoding='UTF-8') as basewords_file: |
||
| 92 | for line in basewords_file: |
||
| 93 | line = line[:-1] |
||
| 94 | if line: |
||
| 95 | basewords.append(line) |
||
| 96 | |||
| 97 | def fuzz_test_base(self): |
||
| 98 | """Test each fingerprint algorithm against the unfuzzed base words.""" |
||
| 99 | for algo in algorithms: |
||
| 100 | for word in self.basewords: |
||
| 101 | try: |
||
| 102 | algorithms[algo](word) |
||
| 103 | except Exception as inst: |
||
| 104 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
| 105 | .format(inst, algo, word)) |
||
| 106 | |||
| 107 | def fuzz_test_20pct(self): |
||
| 108 | """Fuzz test fingerprint algorithms against 20% fuzzed words.""" |
||
| 109 | for _ in range(self.reps): |
||
| 110 | fuzzed = _fuzz(choice(self.basewords), fuzziness=0.2) # noqa: S311 |
||
| 111 | |||
| 112 | if EXTREME_TEST: |
||
| 113 | algs = list(algorithms.keys()) |
||
| 114 | else: |
||
| 115 | algs = sample(list(algorithms.keys()), k=5) # noqa: S311 |
||
| 116 | |||
| 117 | for algo in algs: |
||
| 118 | try: |
||
| 119 | algorithms[algo](fuzzed) |
||
| 120 | except Exception as inst: |
||
| 121 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
| 122 | .format(inst, algo, fuzzed)) |
||
| 123 | |||
| 124 | def fuzz_test_100pct(self): |
||
| 125 | """Fuzz test fingerprint algorithms against 100% fuzzed words.""" |
||
| 126 | for _ in range(self.reps): |
||
| 127 | fuzzed = _fuzz(choice(self.basewords), fuzziness=1) # noqa: S311 |
||
| 128 | |||
| 129 | if EXTREME_TEST: |
||
| 130 | algs = list(algorithms.keys()) |
||
| 131 | else: |
||
| 132 | algs = sample(list(algorithms.keys()), k=5) # noqa: S311 |
||
| 133 | |||
| 134 | for algo in algs: |
||
| 135 | try: |
||
| 136 | algorithms[algo](fuzzed) |
||
| 137 | except Exception as inst: |
||
| 138 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
| 139 | .format(inst, algo, fuzzed)) |
||
| 140 | |||
| 141 | def fuzz_test_fuzz_bmp(self): |
||
| 142 | """Fuzz test fingerprint algorithms against BMP fuzz.""" |
||
| 143 | for _ in range(self.reps): |
||
| 144 | fuzzed = ''.join(_random_char(0xffff) for _ in |
||
| 145 | range(0, randint(8, 16))) # noqa: S311 |
||
| 146 | |||
| 147 | if EXTREME_TEST: |
||
| 148 | algs = list(algorithms.keys()) |
||
| 149 | else: |
||
| 150 | algs = sample(list(algorithms.keys()), k=5) # noqa: S311 |
||
| 151 | |||
| 152 | for algo in algs: |
||
| 153 | try: |
||
| 154 | algorithms[algo](fuzzed) |
||
| 155 | except Exception as inst: |
||
| 156 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
| 157 | .format(inst, algo, fuzzed)) |
||
| 158 | |||
| 159 | def fuzz_test_fuzz_bmpsmp_letter(self): |
||
| 160 | """Fuzz test fingerprint algorithms against alphabetic BMP+SMP fuzz.""" |
||
| 161 | for _ in range(self.reps): |
||
| 162 | fuzzed = ''.join(_random_char(0x1ffff, ' LETTER ') for _ in |
||
| 163 | range(0, randint(8, 16))) # noqa: S311 |
||
| 164 | |||
| 165 | if EXTREME_TEST: |
||
| 166 | algs = list(algorithms.keys()) |
||
| 167 | else: |
||
| 168 | algs = sample(list(algorithms.keys()), k=5) # noqa: S311 |
||
| 169 | |||
| 170 | for algo in algs: |
||
| 171 | try: |
||
| 172 | algorithms[algo](fuzzed) |
||
| 173 | except Exception as inst: |
||
| 174 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
| 175 | .format(inst, algo, fuzzed)) |
||
| 176 | |||
| 177 | def fuzz_test_fuzz_bmpsmp_latin(self): |
||
| 178 | """Fuzz test fingerprint algorithms against Latin BMP+SMP fuzz.""" |
||
| 179 | for _ in range(self.reps): |
||
| 180 | fuzzed = ''.join(_random_char(0x1ffff, 'LATIN ') for _ in |
||
| 181 | range(0, randint(8, 16))) # noqa: S311 |
||
| 182 | |||
| 183 | if EXTREME_TEST: |
||
| 184 | algs = list(algorithms.keys()) |
||
| 185 | else: |
||
| 186 | algs = sample(list(algorithms.keys()), k=5) # noqa: S311 |
||
| 187 | |||
| 188 | for algo in algs: |
||
| 189 | try: |
||
| 190 | algorithms[algo](fuzzed) |
||
| 191 | except Exception as inst: |
||
| 192 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
| 193 | .format(inst, algo, fuzzed)) |
||
| 194 | |||
| 195 | def fuzz_test_fuzz_unicode(self): |
||
| 196 | """Fuzz test fingerprint algorithms against valid Unicode fuzz.""" |
||
| 197 | for _ in range(self.reps): |
||
| 198 | fuzzed = ''.join(_random_char() for _ in |
||
| 199 | range(0, randint(8, 16))) # noqa: S311 |
||
| 200 | |||
| 201 | if EXTREME_TEST: |
||
| 202 | algs = list(algorithms.keys()) |
||
| 203 | else: |
||
| 204 | algs = sample(list(algorithms.keys()), k=5) # noqa: S311 |
||
| 205 | |||
| 206 | for algo in algs: |
||
| 207 | try: |
||
| 208 | algorithms[algo](fuzzed) |
||
| 209 | except Exception as inst: |
||
| 210 | self.fail('Exception "{}" thrown by {} for word: {}' |
||
| 211 | .format(inst, algo, fuzzed)) |
||
| 212 | |||
| 213 | |||
| 214 | if __name__ == '__main__': |
||
| 215 | unittest.main() |
||
| 216 |