tests.fuzz.fuzz_test_fingerprint - Code Metrics - Inspection of "Merge pull request #100 from chrislit/fuzz" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 6d3f3a...971900 )

by Chris

created 2018-09-30 05:01 UTC

tests.fuzz.fuzz_test_fingerprint A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	226
Duplicated Lines	44.25 %

Importance

Changes

Metric	Value
eloc	143
dl	100
loc	226
rs	9.0399
c	0
b	0
f	0
wmc	42

8 Methods

Rating	Name	Duplication	Size	Complexity
A	FuzzedWordsTestCases.fuzz_test_base()	0	9	4
A	FuzzedWordsTestCases.fuzz_test_fuzz_bmpsmp_latin()	17	17	5
A	FuzzedWordsTestCases.fuzz_test_100pct()	16	16	5
A	FuzzedWordsTestCases.fuzz_test_20pct()	16	16	5
B	BigListOfNaughtyStringsTestCases.test_blns()	0	16	8
A	FuzzedWordsTestCases.fuzz_test_fuzz_bmpsmp_letter()	17	17	5
A	FuzzedWordsTestCases.fuzz_test_fuzz_bmp()	17	17	5
A	FuzzedWordsTestCases.fuzz_test_fuzz_unicode()	17	17	5

How to fix Duplicated Code Complexity

# -*- coding: utf-8 -*-

# Copyright 2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.tests.fuzz.test_fingerprint.

This module contains fuzz tests for abydos.fingerprint
"""

import codecs
import os
import random
import unittest

from abydos.fingerprint import count_fingerprint, occurrence_fingerprint, \
    occurrence_halved_fingerprint, omission_key, phonetic_fingerprint, \
    position_fingerprint, qgram_fingerprint, skeleton_key, str_fingerprint, \
    synoname_toolcode

from . import fuzz, random_char

algorithms = {'str_fingerprint': str_fingerprint,
              'qgram_fingerprint': qgram_fingerprint,
              'qgram_fingerprint_3':
                  lambda name: qgram_fingerprint(name, qval=3),
              'qgram_fingerprint_ssj':
                  lambda name:
                  qgram_fingerprint(name, start_stop='$#', joiner=' '),
              'phonetic_fingerprint': phonetic_fingerprint,
              'skeleton_key': skeleton_key,
              'omission_key': omission_key,
              'occurrence_fingerprint': occurrence_fingerprint,
              'occurrence_halved_fingerprint': occurrence_halved_fingerprint,
              'count_fingerprint': count_fingerprint,
              'position_fingerprint': position_fingerprint,
              'synoname_toolcode': synoname_toolcode,
              'synoname_toolcode_2name':
                  lambda name: synoname_toolcode(name, name)}

TESTDIR = os.path.dirname(__file__)

EXTREME_TEST = False  # Set to True to test EVERY single case (NB: takes hours)

if not EXTREME_TEST and os.path.isfile(TESTDIR + '/EXTREME_TEST'):
    # EXTREME_TEST file detected -- switching to EXTREME_TEST mode...
    EXTREME_TEST = True
if not EXTREME_TEST and os.path.isfile(TESTDIR + '/../EXTREME_TEST'):
    # EXTREME_TEST file detected -- switching to EXTREME_TEST mode...
    EXTREME_TEST = True


class BigListOfNaughtyStringsTestCases(unittest.TestCase):
    """Test each fingerprint algorithm against the BLNS set.

    Here, we test each algorithm against each string, but we only care that it
    does not result in an exception.

    While not actually a fuzz test, this does serve the purpose of looking for
    errors resulting from unanticipated input.
    """

    def test_blns(self):
        """Test each fingerprint algorithm against the BLNS set."""
        blns = []
        with codecs.open(TESTDIR+'/corpora/blns.txt', encoding='UTF-8') as nsf:
            for line in nsf:
                line = line[:-1]
                if line and line[0] != '#':
                    blns.append(line)

        for algo in algorithms:
            for ns in blns:
                try:
                    algorithms[algo](ns)
                except Exception as inst:
                    self.fail('Exception "{}" thrown by {} for BLNS: {}'
                              .format(inst, algo, ns))


class FuzzedWordsTestCases(unittest.TestCase):
    """Test each fingerprint algorithm against the base words set."""

    reps = 100000 * (100 if EXTREME_TEST else 1)

    basewords = []
    with codecs.open(TESTDIR + '/corpora/basewords.txt',
                     encoding='UTF-8') as basewords_file:
        for line in basewords_file:
            line = line[:-1]
            if line:
                basewords.append(line)

    def fuzz_test_base(self):
        """Test each fingerprint algorithm against the unfuzzed base words."""
        for algo in algorithms:
            for word in self.basewords:
                try:
                    algorithms[algo](word)
                except Exception as inst:
                    self.fail('Exception "{}" thrown by {} for word: {}'
                              .format(inst, algo, word))

    def fuzz_test_20pct(self):

        """Fuzz test fingerprint algorithms against 20% fuzzed words."""
        for _ in range(self.reps):
            fuzzed = fuzz(random.choice(self.basewords), fuzziness=0.2)

            if EXTREME_TEST:
                algs = list(algorithms.keys())
            else:
                algs = random.choices(list(algorithms.keys()), k=5)

            for algo in algs:
                try:
                    algorithms[algo](fuzzed)
                except Exception as inst:
                    self.fail('Exception "{}" thrown by {} for word: {}'
                              .format(inst, algo, fuzzed))

    def fuzz_test_100pct(self):

        """Fuzz test fingerprint algorithms against 100% fuzzed words."""
        for _ in range(self.reps):
            fuzzed = fuzz(random.choice(self.basewords), fuzziness=1)

            if EXTREME_TEST:
                algs = list(algorithms.keys())
            else:
                algs = random.choices(list(algorithms.keys()), k=5)

            for algo in algs:
                try:
                    algorithms[algo](fuzzed)
                except Exception as inst:
                    self.fail('Exception "{}" thrown by {} for word: {}'
                              .format(inst, algo, fuzzed))

    def fuzz_test_fuzz_bmp(self):

        """Fuzz test fingerprint algorithms against BMP fuzz."""
        for _ in range(self.reps):
            fuzzed = ''.join(random_char(0xffff) for _ in
                             range(0, random.randint(8, 16)))

            if EXTREME_TEST:
                algs = list(algorithms.keys())
            else:
                algs = random.choices(list(algorithms.keys()), k=5)

            for algo in algs:
                try:
                    algorithms[algo](fuzzed)
                except Exception as inst:
                    self.fail('Exception "{}" thrown by {} for word: {}'
                              .format(inst, algo, fuzzed))

    def fuzz_test_fuzz_bmpsmp_letter(self):

        """Fuzz test fingerprint algorithms against alphabetic BMP+SMP fuzz."""
        for _ in range(self.reps):
            fuzzed = ''.join(random_char(0x1ffff, ' LETTER ') for _ in
                             range(0, random.randint(8, 16)))

            if EXTREME_TEST:
                algs = list(algorithms.keys())
            else:
                algs = random.choices(list(algorithms.keys()), k=5)

            for algo in algs:
                try:
                    algorithms[algo](fuzzed)
                except Exception as inst:
                    self.fail('Exception "{}" thrown by {} for word: {}'
                              .format(inst, algo, fuzzed))

    def fuzz_test_fuzz_bmpsmp_latin(self):

        """Fuzz test fingerprint algorithms against Latin BMP+SMP fuzz."""
        for _ in range(self.reps):
            fuzzed = ''.join(random_char(0x1ffff, 'LATIN ') for _ in
                             range(0, random.randint(8, 16)))

            if EXTREME_TEST:
                algs = list(algorithms.keys())
            else:
                algs = random.choices(list(algorithms.keys()), k=5)

            for algo in algs:
                try:
                    algorithms[algo](fuzzed)
                except Exception as inst:
                    self.fail('Exception "{}" thrown by {} for word: {}'
                              .format(inst, algo, fuzzed))

    def fuzz_test_fuzz_unicode(self):

        """Fuzz test fingerprint algorithms against valid Unicode fuzz."""
        for _ in range(self.reps):
            fuzzed = ''.join(random_char() for _ in
                             range(0, random.randint(8, 16)))

            if EXTREME_TEST:
                algs = list(algorithms.keys())
            else:
                algs = random.choices(list(algorithms.keys()), k=5)

            for algo in algs:
                try:
                    algorithms[algo](fuzzed)
                except Exception as inst:
                    self.fail('Exception "{}" thrown by {} for word: {}'
                              .format(inst, algo, fuzzed))


if __name__ == '__main__':
    unittest.main()


1		# -- coding: utf-8 --
2
3		# Copyright 2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19		"""abydos.tests.fuzz.test_fingerprint.
20
21		This module contains fuzz tests for abydos.fingerprint
22		"""
23
24		import codecs
25		import os
26		import random
27		import unittest
28
29		from abydos.fingerprint import count_fingerprint, occurrence_fingerprint, \
30		occurrence_halved_fingerprint, omission_key, phonetic_fingerprint, \
31		position_fingerprint, qgram_fingerprint, skeleton_key, str_fingerprint, \
32		synoname_toolcode
33
34		from . import fuzz, random_char
35
36		algorithms = {'str_fingerprint': str_fingerprint,
37		'qgram_fingerprint': qgram_fingerprint,
38		'qgram_fingerprint_3':
39		lambda name: qgram_fingerprint(name, qval=3),
40		'qgram_fingerprint_ssj':
41		lambda name:
42		qgram_fingerprint(name, start_stop='$#', joiner=' '),
43		'phonetic_fingerprint': phonetic_fingerprint,
44		'skeleton_key': skeleton_key,
45		'omission_key': omission_key,
46		'occurrence_fingerprint': occurrence_fingerprint,
47		'occurrence_halved_fingerprint': occurrence_halved_fingerprint,
48		'count_fingerprint': count_fingerprint,
49		'position_fingerprint': position_fingerprint,
50		'synoname_toolcode': synoname_toolcode,
51		'synoname_toolcode_2name':
52		lambda name: synoname_toolcode(name, name)}
53
54		TESTDIR = os.path.dirname(__file__)
55
56		EXTREME_TEST = False # Set to True to test EVERY single case (NB: takes hours)
57
58		if not EXTREME_TEST and os.path.isfile(TESTDIR + '/EXTREME_TEST'):
59		# EXTREME_TEST file detected -- switching to EXTREME_TEST mode...
60		EXTREME_TEST = True
61		if not EXTREME_TEST and os.path.isfile(TESTDIR + '/../EXTREME_TEST'):
62		# EXTREME_TEST file detected -- switching to EXTREME_TEST mode...
63		EXTREME_TEST = True
64
65
66		class BigListOfNaughtyStringsTestCases(unittest.TestCase):
67		"""Test each fingerprint algorithm against the BLNS set.
68
69		Here, we test each algorithm against each string, but we only care that it
70		does not result in an exception.
71
72		While not actually a fuzz test, this does serve the purpose of looking for
73		errors resulting from unanticipated input.
74		"""
75
76		def test_blns(self):
77		"""Test each fingerprint algorithm against the BLNS set."""
78		blns = []
79		with codecs.open(TESTDIR+'/corpora/blns.txt', encoding='UTF-8') as nsf:
80		for line in nsf:
81		line = line[:-1]
82		if line and line[0] != '#':
83		blns.append(line)
84
85		for algo in algorithms:
86		for ns in blns:
87		try:
88		algorithms[algo](ns)
89		except Exception as inst:
90		self.fail('Exception "{}" thrown by {} for BLNS: {}'
91		.format(inst, algo, ns))
92
93
94		class FuzzedWordsTestCases(unittest.TestCase):
95		"""Test each fingerprint algorithm against the base words set."""
96
97		reps = 100000 * (100 if EXTREME_TEST else 1)
98
99		basewords = []
100		with codecs.open(TESTDIR + '/corpora/basewords.txt',
101		encoding='UTF-8') as basewords_file:
102		for line in basewords_file:
103		line = line[:-1]
104		if line:
105		basewords.append(line)
106
107		def fuzz_test_base(self):
108		"""Test each fingerprint algorithm against the unfuzzed base words."""
109		for algo in algorithms:
110		for word in self.basewords:
111		try:
112		algorithms[algo](word)
113		except Exception as inst:
114		self.fail('Exception "{}" thrown by {} for word: {}'
115		.format(inst, algo, word))
116
117	View Code Duplication	def fuzz_test_20pct(self):
		0 ignored issues – show Duplication introduced 2018-09-30 05:07 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
118		"""Fuzz test fingerprint algorithms against 20% fuzzed words."""
119		for _ in range(self.reps):
120		fuzzed = fuzz(random.choice(self.basewords), fuzziness=0.2)
121
122		if EXTREME_TEST:
123		algs = list(algorithms.keys())
124		else:
125		algs = random.choices(list(algorithms.keys()), k=5)
126
127		for algo in algs:
128		try:
129		algorithms[algo](fuzzed)
130		except Exception as inst:
131		self.fail('Exception "{}" thrown by {} for word: {}'
132		.format(inst, algo, fuzzed))
133
134	View Code Duplication	def fuzz_test_100pct(self):
		0 ignored issues – show Duplication introduced 2018-09-30 05:07 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
135		"""Fuzz test fingerprint algorithms against 100% fuzzed words."""
136		for _ in range(self.reps):
137		fuzzed = fuzz(random.choice(self.basewords), fuzziness=1)
138
139		if EXTREME_TEST:
140		algs = list(algorithms.keys())
141		else:
142		algs = random.choices(list(algorithms.keys()), k=5)
143
144		for algo in algs:
145		try:
146		algorithms[algo](fuzzed)
147		except Exception as inst:
148		self.fail('Exception "{}" thrown by {} for word: {}'
149		.format(inst, algo, fuzzed))
150
151	View Code Duplication	def fuzz_test_fuzz_bmp(self):
		0 ignored issues – show Duplication introduced 2018-09-30 05:07 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
152		"""Fuzz test fingerprint algorithms against BMP fuzz."""
153		for _ in range(self.reps):
154		fuzzed = ''.join(random_char(0xffff) for _ in
155		range(0, random.randint(8, 16)))
156
157		if EXTREME_TEST:
158		algs = list(algorithms.keys())
159		else:
160		algs = random.choices(list(algorithms.keys()), k=5)
161
162		for algo in algs:
163		try:
164		algorithms[algo](fuzzed)
165		except Exception as inst:
166		self.fail('Exception "{}" thrown by {} for word: {}'
167		.format(inst, algo, fuzzed))
168
169	View Code Duplication	def fuzz_test_fuzz_bmpsmp_letter(self):
		0 ignored issues – show Duplication introduced 2018-09-30 05:07 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
170		"""Fuzz test fingerprint algorithms against alphabetic BMP+SMP fuzz."""
171		for _ in range(self.reps):
172		fuzzed = ''.join(random_char(0x1ffff, ' LETTER ') for _ in
173		range(0, random.randint(8, 16)))
174
175		if EXTREME_TEST:
176		algs = list(algorithms.keys())
177		else:
178		algs = random.choices(list(algorithms.keys()), k=5)
179
180		for algo in algs:
181		try:
182		algorithms[algo](fuzzed)
183		except Exception as inst:
184		self.fail('Exception "{}" thrown by {} for word: {}'
185		.format(inst, algo, fuzzed))
186
187	View Code Duplication	def fuzz_test_fuzz_bmpsmp_latin(self):
		0 ignored issues – show Duplication introduced 2018-09-30 05:07 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
188		"""Fuzz test fingerprint algorithms against Latin BMP+SMP fuzz."""
189		for _ in range(self.reps):
190		fuzzed = ''.join(random_char(0x1ffff, 'LATIN ') for _ in
191		range(0, random.randint(8, 16)))
192
193		if EXTREME_TEST:
194		algs = list(algorithms.keys())
195		else:
196		algs = random.choices(list(algorithms.keys()), k=5)
197
198		for algo in algs:
199		try:
200		algorithms[algo](fuzzed)
201		except Exception as inst:
202		self.fail('Exception "{}" thrown by {} for word: {}'
203		.format(inst, algo, fuzzed))
204
205	View Code Duplication	def fuzz_test_fuzz_unicode(self):
		0 ignored issues – show Duplication introduced 2018-09-30 05:07 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
206		"""Fuzz test fingerprint algorithms against valid Unicode fuzz."""
207		for _ in range(self.reps):
208		fuzzed = ''.join(random_char() for _ in
209		range(0, random.randint(8, 16)))
210
211		if EXTREME_TEST:
212		algs = list(algorithms.keys())
213		else:
214		algs = random.choices(list(algorithms.keys()), k=5)
215
216		for algo in algs:
217		try:
218		algorithms[algo](fuzzed)
219		except Exception as inst:
220		self.fail('Exception "{}" thrown by {} for word: {}'
221		.format(inst, algo, fuzzed))
222
223
224		if __name__ == '__main__':
225		unittest.main()
226

chrislit / abydos

Push — master ( 6d3f3a...971900 )

tests.fuzz.fuzz_test_fingerprint A

Complexity

Size/Duplication

Importance

8 Methods

How to fix Duplicated Code Complexity

Duplicated Code

Complexity

Duplication Side-by-Side

Filter issues like