Passed
Push — master ( 416c2f...9ec382 )
by Chris
01:03 queued 13s
created

abydos.fingerprint._count.Count.fingerprint()   A

Complexity

Conditions 1

Size

Total Lines 37
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1

Importance

Changes 0
Metric Value
eloc 3
dl 0
loc 37
ccs 4
cts 4
cp 1
rs 10
c 0
b 0
f 0
cc 1
nop 2
crap 1
1
# Copyright 2018-2020 by Christopher C. Little.
2
# This file is part of Abydos.
3
#
4
# Abydos is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# Abydos is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17
"""abydos.fingerprint._count.
18
19 1
Cisłak & Grabowski's count fingerprint
20
"""
21
22
from collections import Counter
23
from typing import Tuple
24 1
25
from ._fingerprint import MOST_COMMON_LETTERS_CG, _Fingerprint
26
27
__all__ = ['Count']
28
29
30
class Count(_Fingerprint):
31 1
    """Count Fingerprint.
32
33 1
    Based on the count fingerprint from :cite:`Cislak:2017`.
34
35 1
    .. versionadded:: 0.3.6
36 1
    """
37
38 1
    def __init__(
39
        self,
40
        n_bits: int = 16,
41 1
        most_common: Tuple[str, ...] = MOST_COMMON_LETTERS_CG,
42
    ) -> None:
43
        """Initialize Count instance.
44
45
        Parameters
46
        ----------
47
        n_bits : int
48
            Number of bits in the fingerprint returned
49 1
        most_common : list
50
            The most common tokens in the target language, ordered by frequency
51
52
53
        .. versionadded:: 0.4.0
54
55
        """
56
        super(Count, self).__init__()
57
        self._n_bits = n_bits
58
        self._most_common = most_common
59
60
    def fingerprint(self, word: str) -> str:
61
        """Return the count fingerprint.
62
63 1
        Parameters
64 1
        ----------
65 1
        word : str
66
            The word to fingerprint
67 1
68
        Returns
69
        -------
70
        str
71
            The count fingerprint
72
73
        Examples
74
        --------
75
        >>> cf = Count()
76
        >>> cf.fingerprint('hat')
77
        '0001010000000001'
78
        >>> cf.fingerprint('niall')
79
        '0000010001010000'
80
        >>> cf.fingerprint('colin')
81
        '0000000101010000'
82
        >>> cf.fingerprint('atcg')
83
        '0001010000000000'
84
        >>> cf.fingerprint('entreatment')
85
        '1111010000100000'
86
87
88
        .. versionadded:: 0.3.0
89
        .. versionchanged:: 0.3.6
90
            Encapsulated in class
91
        .. versionchanged:: 0.6.0
92
            Changed to return a str and added fingerprint_int method
93
94
        """
95
        return ('{:0' + str(self._n_bits) + 'b}').format(
96
            self.fingerprint_int(word)
97
        )
98
99
    def fingerprint_int(self, word: str) -> int:
100 1
        """Return the count fingerprint.
101 1
102 1
        Parameters
103
        ----------
104 1
        word : str
105 1
            The word to fingerprint
106
107 1
        Returns
108 1
        -------
109 1
        int
110 1
            The count fingerprint as an int
111 1
112
        Examples
113 1
        --------
114
        >>> cf = Count()
115 1
        >>> cf.fingerprint_int('hat')
116 1
        5121
117
        >>> cf.fingerprint_int('niall')
118 1
        1104
119
        >>> cf.fingerprint_int('colin')
120
        336
121 1
        >>> cf.fingerprint_int('atcg')
122
        5120
123
        >>> cf.fingerprint_int('entreatment')
124
        62496
125
126
127 1
        .. versionadded:: 0.6.0
128
129
        """
130
        n_bits = self._n_bits
131
        if n_bits % 2:
132
            n_bits += 1
133
134
        letter_counts = Counter(word)
135
        fingerprint = 0
136
137
        for letter in self._most_common:
138
            if n_bits:
139
                fingerprint <<= 2
140
                fingerprint += letter_counts[letter] & 3
141
                n_bits -= 2
142
            else:
143
                break
144
145
        if n_bits:
146
            fingerprint <<= n_bits
147
148
        return fingerprint
149
150
151
if __name__ == '__main__':
152
    import doctest
153
154
    doctest.testmod()
155