abydos.tokenizer._qgrams.QGrams.__init__() - Code Metrics - Inspection of "started new entry in HISTORY for 0.4.0" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 3ac297...afe14d )

by Chris

created 2019-06-01 01:11 UTC

abydos.tokenizer._qgrams.QGrams.init() C

↳ Parent: abydos.tokenizer._qgrams

Complexity

Conditions

Size

Total Lines	74
Code Lines	25

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	20
CRAP Score	10

Importance

Changes

Metric	Value
cc	10
eloc	25
nop	5
dl	0
loc	74
ccs	20
cts	20
cp	1
crap	10
rs	5.9999
c	0
b	0
f	0

How to fix Long Method Complexity

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.tokenizer._q_grams.

QGrams multi-set class
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from collections import Counter, Iterable

from six.moves import range

__all__ = ['QGrams']


class QGrams(Counter):
    """A q-gram class, which functions like a bag/multiset.

    A q-gram is here defined as all sequences of q characters. Q-grams are also
    known as k-grams and n-grams, but the term n-gram more typically refers to
    sequences of whitespace-delimited words in a string, where q-gram refers
    to sequences of characters in a word or string.
    """

    def __init__(self, term, qval=2, start_stop='$#', skip=0):
        """Initialize QGrams.

        Parameters
        ----------
        term : str
            A string to extract q-grams from
        qval : int or Iterable
            The q-gram length (defaults to 2), can be an integer, range object,
            or list
        start_stop : str
            A string of length >= 0 indicating start & stop symbols.
            If the string is '', q-grams will be calculated without start &
            stop symbols appended to each end.
            Otherwise, the first character of start_stop will pad the
            beginning of the string and the last character of start_stop
            will pad the end of the string before q-grams are calculated.
            (In the case that start_stop is only 1 character long, the same
            symbol will be used for both.)
        skip : int or Iterable
            The number of characters to skip, can be an integer, range object,
            or list

        Examples
        --------
        >>> qg = QGrams('AATTATAT')
        >>> qg
        QGrams({'AT': 3, 'TA': 2, '$A': 1, 'AA': 1, 'TT': 1, 'T#': 1})

        >>> qg = QGrams('AATTATAT', qval=1, start_stop='')
        >>> qg
        QGrams({'A': 4, 'T': 4})

        >>> qg = QGrams('AATTATAT', qval=3, start_stop='')
        >>> qg
        QGrams({'TAT': 2, 'AAT': 1, 'ATT': 1, 'TTA': 1, 'ATA': 1})

        """
        # Save the term itself
        self._term = term
        self._term_ss = term
        self._ordered_list = []

        if not isinstance(qval, Iterable):
            qval = (qval,)
        if not isinstance(skip, Iterable):
            skip = (skip,)

        for qval_i in qval:
            for skip_i in skip:
                if len(self._term) < qval_i or qval_i < 1:
                    continue

                if start_stop and qval_i > 1:
                    term = (
                        start_stop[0] * (qval_i - 1)
                        + self._term
                        + start_stop[-1] * (qval_i - 1)
                    )
                else:
                    term = self._term

                # Having appended start & stop symbols (or not), save the
                # result, but only for the longest valid qval_i
                if len(term) > len(self._term_ss):
                    self._term_ss = term

                skip_i += 1
                self._ordered_list += [
                    term[i : i + (qval_i * skip_i) : skip_i]
                    for i in range(len(term) - (qval_i - 1))
                ]

        super(QGrams, self).__init__(self._ordered_list)

    def count(self):
        """Return q-grams count.

        Returns
        -------
        int
            The total count of q-grams in a QGrams object

        Examples
        --------
        >>> qg = QGrams('AATTATAT')
        >>> qg.count()
        9

        >>> qg = QGrams('AATTATAT', qval=1, start_stop='')
        >>> qg.count()
        8

        >>> qg = QGrams('AATTATAT', qval=3, start_stop='')
        >>> qg.count()
        6

        """
        return sum(self.values())


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.tokenizer._q_grams.
20
21		QGrams multi-set class
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from collections import Counter, Iterable
32
33	1	from six.moves import range
34
35	1	__all__ = ['QGrams']
36
37
38	1	class QGrams(Counter):
39		"""A q-gram class, which functions like a bag/multiset.
40
41		A q-gram is here defined as all sequences of q characters. Q-grams are also
42		known as k-grams and n-grams, but the term n-gram more typically refers to
43		sequences of whitespace-delimited words in a string, where q-gram refers
44		to sequences of characters in a word or string.
45		"""
46
47	1	def __init__(self, term, qval=2, start_stop='$#', skip=0):
48		"""Initialize QGrams.
49
50		Parameters
51		----------
52		term : str
53		A string to extract q-grams from
54		qval : int or Iterable
55		The q-gram length (defaults to 2), can be an integer, range object,
56		or list
57		start_stop : str
58		A string of length >= 0 indicating start & stop symbols.
59		If the string is '', q-grams will be calculated without start &
60		stop symbols appended to each end.
61		Otherwise, the first character of start_stop will pad the
62		beginning of the string and the last character of start_stop
63		will pad the end of the string before q-grams are calculated.
64		(In the case that start_stop is only 1 character long, the same
65		symbol will be used for both.)
66		skip : int or Iterable
67		The number of characters to skip, can be an integer, range object,
68		or list
69
70		Examples
71		--------
72		>>> qg = QGrams('AATTATAT')
73		>>> qg
74		QGrams({'AT': 3, 'TA': 2, '$A': 1, 'AA': 1, 'TT': 1, 'T#': 1})
75
76		>>> qg = QGrams('AATTATAT', qval=1, start_stop='')
77		>>> qg
78		QGrams({'A': 4, 'T': 4})
79
80		>>> qg = QGrams('AATTATAT', qval=3, start_stop='')
81		>>> qg
82		QGrams({'TAT': 2, 'AAT': 1, 'ATT': 1, 'TTA': 1, 'ATA': 1})
83
84		"""
85		# Save the term itself
86	1	self._term = term
87	1	self._term_ss = term
88	1	self._ordered_list = []
89
90	1	if not isinstance(qval, Iterable):
91	1	qval = (qval,)
92	1	if not isinstance(skip, Iterable):
93	1	skip = (skip,)
94
95	1	for qval_i in qval:
96	1	for skip_i in skip:
97	1	if len(self._term) < qval_i or qval_i < 1:
98	1	continue
99
100	1	if start_stop and qval_i > 1:
101	1	term = (
102		start_stop[0] * (qval_i - 1)
103		+ self._term
104		+ start_stop[-1] * (qval_i - 1)
105		)
106		else:
107	1	term = self._term
108
109		# Having appended start & stop symbols (or not), save the
110		# result, but only for the longest valid qval_i
111	1	if len(term) > len(self._term_ss):
112	1	self._term_ss = term
113
114	1	skip_i += 1
115	1	self._ordered_list += [
116		term[i : i + (qval_i * skip_i) : skip_i]
117		for i in range(len(term) - (qval_i - 1))
118		]
119
120	1	super(QGrams, self).__init__(self._ordered_list)
121
122	1	def count(self):
123		"""Return q-grams count.
124
125		Returns
126		-------
127		int
128		The total count of q-grams in a QGrams object
129
130		Examples
131		--------
132		>>> qg = QGrams('AATTATAT')
133		>>> qg.count()
134		9
135
136		>>> qg = QGrams('AATTATAT', qval=1, start_stop='')
137		>>> qg.count()
138		8
139
140		>>> qg = QGrams('AATTATAT', qval=3, start_stop='')
141		>>> qg.count()
142		6
143
144		"""
145	1	return sum(self.values())
146
147
148		if __name__ == '__main__':
149		import doctest
150
151		doctest.testmod()
152

chrislit / abydos

Push — master ( 3ac297...afe14d )

abydos.tokenizer._qgrams.QGrams.__init__() C

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like

abydos.tokenizer._qgrams.QGrams.init() C