tests.fuzz.fuzz_test_fingerprint - Code Metrics - Inspection of "Merge pull request #127 from chrislit/fix_builds" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Branch — master (87ccc1)

by Chris

created 2018-10-23 03:59 UTC

tests.fuzz.fuzz_test_fingerprint A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	216
Duplicated Lines	46.3 %

Importance

Changes

Metric	Value
eloc	138
dl	100
loc	216
rs	9.0399
c	0
b	0
f	0
wmc	42

How to fix Duplicated Code Complexity

# -*- coding: utf-8 -*-

# Copyright 2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.tests.fuzz.test_fingerprint.

This module contains fuzz tests for abydos.fingerprint
"""

import codecs
import unittest
from random import choice, randint, sample

from abydos.fingerprint.basic import phonetic_fingerprint, qgram_fingerprint, \
    str_fingerprint
from abydos.fingerprint.lightweight import count_fingerprint, \
    occurrence_fingerprint, occurrence_halved_fingerprint, position_fingerprint
from abydos.fingerprint.speedcop import omission_key, skeleton_key
from abydos.fingerprint.synoname import synoname_toolcode

from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char

algorithms = {'str_fingerprint': str_fingerprint,
              'qgram_fingerprint': qgram_fingerprint,
              'qgram_fingerprint_3':
                  lambda name: qgram_fingerprint(name, qval=3),
              'qgram_fingerprint_ssj':
                  lambda name:
                  qgram_fingerprint(name, start_stop='$#', joiner=' '),
              'phonetic_fingerprint': phonetic_fingerprint,
              'skeleton_key': skeleton_key,
              'omission_key': omission_key,
              'occurrence_fingerprint': occurrence_fingerprint,
              'occurrence_halved_fingerprint': occurrence_halved_fingerprint,
              'count_fingerprint': count_fingerprint,
              'position_fingerprint': position_fingerprint,
              'synoname_toolcode': synoname_toolcode,
              'synoname_toolcode_2name':
                  lambda name: synoname_toolcode(name, name)}


class BigListOfNaughtyStringsTestCases(unittest.TestCase):
    """Test each fingerprint algorithm against the BLNS set.

    Here, we test each algorithm against each string, but we only care that it
    does not result in an exception.

    While not actually a fuzz test, this does serve the purpose of looking for
    errors resulting from unanticipated input.
    """

    def test_blns(self):
        """Test each fingerprint algorithm against the BLNS set."""
        blns = []
        with codecs.open(_corpus_file('blns.txt'), encoding='UTF-8') as nsf:
            for line in nsf:
                line = line[:-1]
                if line and line[0] != '#':
                    blns.append(line)

        for algo in algorithms:
            for ns in blns:
                try:
                    algorithms[algo](ns)
                except Exception as inst:
                    self.fail('Exception "{}" thrown by {} for BLNS: {}'
                              .format(inst, algo, ns))


class FuzzedWordsTestCases(unittest.TestCase):
    """Test each fingerprint algorithm against the base words set."""

    reps = 1000 * (10000 if EXTREME_TEST else 1)

    basewords = []
    with codecs.open(_corpus_file('basewords.txt'),
                     encoding='UTF-8') as basewords_file:
        for line in basewords_file:
            line = line[:-1]
            if line:
                basewords.append(line)

    def fuzz_test_base(self):
        """Test each fingerprint algorithm against the unfuzzed base words."""
        for algo in algorithms:
            for word in self.basewords:
                try:
                    algorithms[algo](word)
                except Exception as inst:
                    self.fail('Exception "{}" thrown by {} for word: {}'
                              .format(inst, algo, word))

    def fuzz_test_20pct(self):
        """Fuzz test fingerprint algorithms against 20% fuzzed words."""
        for _ in range(self.reps):
            fuzzed = _fuzz(choice(self.basewords), fuzziness=0.2)  # noqa: S311

            if EXTREME_TEST:
                algs = list(algorithms.keys())
            else:
                algs = sample(list(algorithms.keys()), k=5)  # noqa: S311

            for algo in algs:
                try:
                    algorithms[algo](fuzzed)
                except Exception as inst:
                    self.fail('Exception "{}" thrown by {} for word: {}'
                              .format(inst, algo, fuzzed))

    def fuzz_test_100pct(self):
        """Fuzz test fingerprint algorithms against 100% fuzzed words."""
        for _ in range(self.reps):
            fuzzed = _fuzz(choice(self.basewords), fuzziness=1)  # noqa: S311

            if EXTREME_TEST:
                algs = list(algorithms.keys())
            else:
                algs = sample(list(algorithms.keys()), k=5)  # noqa: S311

            for algo in algs:
                try:
                    algorithms[algo](fuzzed)
                except Exception as inst:
                    self.fail('Exception "{}" thrown by {} for word: {}'
                              .format(inst, algo, fuzzed))

    def fuzz_test_fuzz_bmp(self):
        """Fuzz test fingerprint algorithms against BMP fuzz."""
        for _ in range(self.reps):
            fuzzed = ''.join(_random_char(0xffff) for _ in
                             range(0, randint(8, 16)))  # noqa: S311

            if EXTREME_TEST:
                algs = list(algorithms.keys())
            else:
                algs = sample(list(algorithms.keys()), k=5)  # noqa: S311

            for algo in algs:
                try:
                    algorithms[algo](fuzzed)
                except Exception as inst:
                    self.fail('Exception "{}" thrown by {} for word: {}'
                              .format(inst, algo, fuzzed))

    def fuzz_test_fuzz_bmpsmp_letter(self):
        """Fuzz test fingerprint algorithms against alphabetic BMP+SMP fuzz."""
        for _ in range(self.reps):
            fuzzed = ''.join(_random_char(0x1ffff, ' LETTER ') for _ in
                             range(0, randint(8, 16)))  # noqa: S311

            if EXTREME_TEST:
                algs = list(algorithms.keys())
            else:
                algs = sample(list(algorithms.keys()), k=5)  # noqa: S311

            for algo in algs:
                try:
                    algorithms[algo](fuzzed)
                except Exception as inst:
                    self.fail('Exception "{}" thrown by {} for word: {}'
                              .format(inst, algo, fuzzed))

    def fuzz_test_fuzz_bmpsmp_latin(self):
        """Fuzz test fingerprint algorithms against Latin BMP+SMP fuzz."""
        for _ in range(self.reps):
            fuzzed = ''.join(_random_char(0x1ffff, 'LATIN ') for _ in
                             range(0, randint(8, 16)))  # noqa: S311

            if EXTREME_TEST:
                algs = list(algorithms.keys())
            else:
                algs = sample(list(algorithms.keys()), k=5)  # noqa: S311

            for algo in algs:
                try:
                    algorithms[algo](fuzzed)
                except Exception as inst:
                    self.fail('Exception "{}" thrown by {} for word: {}'
                              .format(inst, algo, fuzzed))

    def fuzz_test_fuzz_unicode(self):
        """Fuzz test fingerprint algorithms against valid Unicode fuzz."""
        for _ in range(self.reps):
            fuzzed = ''.join(_random_char() for _ in
                             range(0, randint(8, 16)))  # noqa: S311

            if EXTREME_TEST:
                algs = list(algorithms.keys())
            else:
                algs = sample(list(algorithms.keys()), k=5)  # noqa: S311

            for algo in algs:
                try:
                    algorithms[algo](fuzzed)
                except Exception as inst:
                    self.fail('Exception "{}" thrown by {} for word: {}'
                              .format(inst, algo, fuzzed))


if __name__ == '__main__':
    unittest.main()


1			# -- coding: utf-8 --
2
3			# Copyright 2018 by Christopher C. Little.
4			# This file is part of Abydos.
5			#
6			# Abydos is free software: you can redistribute it and/or modify
7			# it under the terms of the GNU General Public License as published by
8			# the Free Software Foundation, either version 3 of the License, or
9			# (at your option) any later version.
10			#
11			# Abydos is distributed in the hope that it will be useful,
12			# but WITHOUT ANY WARRANTY; without even the implied warranty of
13			# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14			# GNU General Public License for more details.
15			#
16			# You should have received a copy of the GNU General Public License
17			# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19			"""abydos.tests.fuzz.test_fingerprint.
20
21			This module contains fuzz tests for abydos.fingerprint
22			"""
23
24			import codecs
25			import unittest
26			from random import choice, randint, sample
27
28			from abydos.fingerprint.basic import phonetic_fingerprint, qgram_fingerprint, \
29			str_fingerprint
30			from abydos.fingerprint.lightweight import count_fingerprint, \
31			occurrence_fingerprint, occurrence_halved_fingerprint, position_fingerprint
32			from abydos.fingerprint.speedcop import omission_key, skeleton_key
33			from abydos.fingerprint.synoname import synoname_toolcode
34
35			from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char
36
37			algorithms = {'str_fingerprint': str_fingerprint,
38			'qgram_fingerprint': qgram_fingerprint,
39			'qgram_fingerprint_3':
40			lambda name: qgram_fingerprint(name, qval=3),
41			'qgram_fingerprint_ssj':
42			lambda name:
43			qgram_fingerprint(name, start_stop='$#', joiner=' '),
44			'phonetic_fingerprint': phonetic_fingerprint,
45			'skeleton_key': skeleton_key,
46			'omission_key': omission_key,
47			'occurrence_fingerprint': occurrence_fingerprint,
48			'occurrence_halved_fingerprint': occurrence_halved_fingerprint,
49			'count_fingerprint': count_fingerprint,
50			'position_fingerprint': position_fingerprint,
51			'synoname_toolcode': synoname_toolcode,
52			'synoname_toolcode_2name':
53			lambda name: synoname_toolcode(name, name)}
54
55
56			class BigListOfNaughtyStringsTestCases(unittest.TestCase):
57			"""Test each fingerprint algorithm against the BLNS set.
58
59			Here, we test each algorithm against each string, but we only care that it
60			does not result in an exception.
61
62			While not actually a fuzz test, this does serve the purpose of looking for
63			errors resulting from unanticipated input.
64			"""
65
66			def test_blns(self):
67			"""Test each fingerprint algorithm against the BLNS set."""
68			blns = []
69			with codecs.open(_corpus_file('blns.txt'), encoding='UTF-8') as nsf:
70			for line in nsf:
71			line = line[:-1]
72			if line and line[0] != '#':
73			blns.append(line)
74
75			for algo in algorithms:
76			for ns in blns:
77			try:
78			algorithms[algo](ns)
79			except Exception as inst:
80			self.fail('Exception "{}" thrown by {} for BLNS: {}'
81			.format(inst, algo, ns))
82
83
84			class FuzzedWordsTestCases(unittest.TestCase):
85			"""Test each fingerprint algorithm against the base words set."""
86
87			reps = 1000 * (10000 if EXTREME_TEST else 1)
88
89			basewords = []
90			with codecs.open(_corpus_file('basewords.txt'),
91			encoding='UTF-8') as basewords_file:
92			for line in basewords_file:
93			line = line[:-1]
94			if line:
95			basewords.append(line)
96
97			def fuzz_test_base(self):
98			"""Test each fingerprint algorithm against the unfuzzed base words."""
99			for algo in algorithms:
100			for word in self.basewords:
101			try:
102			algorithms[algo](word)
103			except Exception as inst:
104			self.fail('Exception "{}" thrown by {} for word: {}'
105			.format(inst, algo, word))
106
107			def fuzz_test_20pct(self):
108			"""Fuzz test fingerprint algorithms against 20% fuzzed words."""
109			for _ in range(self.reps):
110			fuzzed = _fuzz(choice(self.basewords), fuzziness=0.2) # noqa: S311
111
112			if EXTREME_TEST:
113			algs = list(algorithms.keys())
114			else:
115			algs = sample(list(algorithms.keys()), k=5) # noqa: S311
116
117			for algo in algs:
118			try:
119			algorithms[algo](fuzzed)
120			except Exception as inst:
121			self.fail('Exception "{}" thrown by {} for word: {}'
122			.format(inst, algo, fuzzed))
123
124			def fuzz_test_100pct(self):
125			"""Fuzz test fingerprint algorithms against 100% fuzzed words."""
126			for _ in range(self.reps):
127			fuzzed = _fuzz(choice(self.basewords), fuzziness=1) # noqa: S311
128
129			if EXTREME_TEST:
130			algs = list(algorithms.keys())
131			else:
132			algs = sample(list(algorithms.keys()), k=5) # noqa: S311
133
134			for algo in algs:
135			try:
136			algorithms[algo](fuzzed)
137			except Exception as inst:
138			self.fail('Exception "{}" thrown by {} for word: {}'
139			.format(inst, algo, fuzzed))
140
141			def fuzz_test_fuzz_bmp(self):
142			"""Fuzz test fingerprint algorithms against BMP fuzz."""
143			for _ in range(self.reps):
144			fuzzed = ''.join(_random_char(0xffff) for _ in
145			range(0, randint(8, 16))) # noqa: S311
146
147			if EXTREME_TEST:
148			algs = list(algorithms.keys())
149			else:
150			algs = sample(list(algorithms.keys()), k=5) # noqa: S311
151
152			for algo in algs:
153			try:
154			algorithms[algo](fuzzed)
155			except Exception as inst:
156			self.fail('Exception "{}" thrown by {} for word: {}'
157			.format(inst, algo, fuzzed))
158
159			def fuzz_test_fuzz_bmpsmp_letter(self):
160			"""Fuzz test fingerprint algorithms against alphabetic BMP+SMP fuzz."""
161			for _ in range(self.reps):
162			fuzzed = ''.join(_random_char(0x1ffff, ' LETTER ') for _ in
163			range(0, randint(8, 16))) # noqa: S311
164
165			if EXTREME_TEST:
166			algs = list(algorithms.keys())
167			else:
168			algs = sample(list(algorithms.keys()), k=5) # noqa: S311
169
170			for algo in algs:
171			try:
172			algorithms[algo](fuzzed)
173			except Exception as inst:
174			self.fail('Exception "{}" thrown by {} for word: {}'
175			.format(inst, algo, fuzzed))
176
177			def fuzz_test_fuzz_bmpsmp_latin(self):
178			"""Fuzz test fingerprint algorithms against Latin BMP+SMP fuzz."""
179			for _ in range(self.reps):
180			fuzzed = ''.join(_random_char(0x1ffff, 'LATIN ') for _ in
181			range(0, randint(8, 16))) # noqa: S311
182
183			if EXTREME_TEST:
184			algs = list(algorithms.keys())
185			else:
186			algs = sample(list(algorithms.keys()), k=5) # noqa: S311
187
188			for algo in algs:
189			try:
190			algorithms[algo](fuzzed)
191			except Exception as inst:
192			self.fail('Exception "{}" thrown by {} for word: {}'
193			.format(inst, algo, fuzzed))
194
195			def fuzz_test_fuzz_unicode(self):
196			"""Fuzz test fingerprint algorithms against valid Unicode fuzz."""
197			for _ in range(self.reps):
198			fuzzed = ''.join(_random_char() for _ in
199			range(0, randint(8, 16))) # noqa: S311
200
201			if EXTREME_TEST:
202			algs = list(algorithms.keys())
203			else:
204			algs = sample(list(algorithms.keys()), k=5) # noqa: S311
205
206			for algo in algs:
207			try:
208			algorithms[algo](fuzzed)
209			except Exception as inst:
210			self.fail('Exception "{}" thrown by {} for word: {}'
211			.format(inst, algo, fuzzed))
212
213
214			if __name__ == '__main__':
215			unittest.main()
216

chrislit / abydos

Branch — master (87ccc1)

tests.fuzz.fuzz_test_fingerprint A

Complexity

Size/Duplication

Importance

How to fix Duplicated Code Complexity

Duplicated Code

Complexity

Duplication Side-by-Side

Filter issues like