Passed
Push — master ( 416c2f...9ec382 )
by Chris
01:03 queued 13s
created

OccurrenceHalved.fingerprint_int()   B

Complexity

Conditions 7

Size

Total Lines 58
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 22
CRAP Score 7

Importance

Changes 0
Metric Value
eloc 21
dl 0
loc 58
ccs 22
cts 22
cp 1
rs 7.9759
c 0
b 0
f 0
cc 7
nop 2
crap 7

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# Copyright 2018-2020 by Christopher C. Little.
2
# This file is part of Abydos.
3
#
4
# Abydos is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# Abydos is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17
"""abydos.fingerprint._occurrence_halved.
18
19 1
Cisłak & Grabowski's occurrence halved fingerprint
20
"""
21
22
from typing import Tuple
23
24 1
from ._fingerprint import MOST_COMMON_LETTERS_CG, _Fingerprint
25
26
__all__ = ['OccurrenceHalved']
27
28
29
class OccurrenceHalved(_Fingerprint):
30
    """Occurrence Halved Fingerprint.
31 1
32
    Based on the occurrence halved fingerprint from :cite:`Cislak:2017`.
33 1
34 1
    .. versionadded:: 0.3.6
35
    """
36 1
37
    def __init__(
38
        self,
39 1
        n_bits: int = 16,
40
        most_common: Tuple[str, ...] = MOST_COMMON_LETTERS_CG,
41
    ) -> None:
42
        """Initialize Count instance.
43
44
        Parameters
45
        ----------
46
        n_bits : int
47 1
            Number of bits in the fingerprint returned
48
        most_common : list
49
            The most common tokens in the target language, ordered by frequency
50
51
52
        .. versionadded:: 0.4.0
53
54
        """
55
        super(OccurrenceHalved, self).__init__()
56
        self._n_bits = n_bits
57
        self._most_common = most_common
58
59
    def fingerprint(self, word: str) -> str:
60
        """Return the occurrence halved fingerprint.
61 1
62 1
        Based on the occurrence halved fingerprint from :cite:`Cislak:2017`.
63 1
64
        Parameters
65 1
        ----------
66
        word : str
67
            The word to fingerprint
68
69
        Returns
70
        -------
71
        str
72
            The occurrence halved fingerprint
73
74
        Examples
75
        --------
76
        >>> ohf = OccurrenceHalved()
77
        >>> ohf.fingerprint('hat')
78
        '0001010000000010'
79
        >>> ohf.fingerprint('niall')
80
        '0000010010100000'
81
        >>> ohf.fingerprint('colin')
82
        '0000001001010000'
83
        >>> ohf.fingerprint('atcg')
84
        '0010100000000000'
85
        >>> ohf.fingerprint('entreatment')
86
        '1111010000110000'
87
88
89
        .. versionadded:: 0.3.0
90
        .. versionchanged:: 0.3.6
91
            Encapsulated in class
92
        .. versionchanged:: 0.6.0
93
            Changed to return a str and added fingerprint_int method
94
95
        """
96
        return ('{:0' + str(self._n_bits) + 'b}').format(
97
            self.fingerprint_int(word)
98
        )
99
100
    def fingerprint_int(self, word: str) -> int:
101
        """Return the occurrence halved fingerprint.
102
103
        Based on the occurrence halved fingerprint from :cite:`Cislak:2017`.
104 1
105 1
        Parameters
106 1
        ----------
107
        word : int
108 1
            The word to fingerprint
109 1
110 1
        Returns
111 1
        -------
112
        int
113 1
            The occurrence halved fingerprint as an int
114 1
115 1
        Examples
116 1
        --------
117 1
        >>> ohf = OccurrenceHalved()
118 1
        >>> ohf.fingerprint_int('hat')
119 1
        5122
120 1
        >>> ohf.fingerprint_int('niall')
121 1
        1184
122
        >>> ohf.fingerprint_int('colin')
123 1
        592
124
        >>> ohf.fingerprint_int('atcg')
125 1
        10240
126 1
        >>> ohf.fingerprint_int('entreatment')
127
        62512
128 1
129
130
        .. versionadded:: 0.6.0
131 1
132
        """
133
        n_bits = self._n_bits
134
        if n_bits % 2:
135
            n_bits += 1
136
137 1
        w_len = len(word) // 2
138
        w_1 = set(word[:w_len])
139
        w_2 = set(word[w_len:])
140
        fingerprint = 0
141
142
        for letter in self._most_common:
143
            if n_bits:
144
                fingerprint <<= 1
145
                if letter in w_1:
146
                    fingerprint += 1
147
                fingerprint <<= 1
148
                if letter in w_2:
149
                    fingerprint += 1
150
                n_bits -= 2
151
            else:
152
                break
153
154
        if n_bits > 0:
155
            fingerprint <<= n_bits
156
157
        return fingerprint
158
159
160
if __name__ == '__main__':
161
    import doctest
162
163
    doctest.testmod()
164