Passed
Push — master ( 416c2f...9ec382 )
by Chris
01:03 queued 13s
created

Occurrence.fingerprint()   A

Complexity

Conditions 1

Size

Total Lines 37
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1

Importance

Changes 0
Metric Value
eloc 3
dl 0
loc 37
ccs 4
cts 4
cp 1
rs 10
c 0
b 0
f 0
cc 1
nop 2
crap 1
1
# Copyright 2018-2020 by Christopher C. Little.
2
# This file is part of Abydos.
3
#
4
# Abydos is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# Abydos is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17
"""abydos.fingerprint._occurrence.
18
19 1
Cisłak & Grabowski's occurrence fingerprint
20
"""
21
22
from typing import Tuple
23
24 1
from ._fingerprint import MOST_COMMON_LETTERS_CG, _Fingerprint
25
26
__all__ = ['Occurrence']
27
28
29
class Occurrence(_Fingerprint):
30
    """Occurrence Fingerprint.
31 1
32
    Based on the occurrence fingerprint from :cite:`Cislak:2017`.
33 1
34 1
    .. versionadded:: 0.3.6
35
    """
36 1
37
    def __init__(
38
        self,
39 1
        n_bits: int = 16,
40
        most_common: Tuple[str, ...] = MOST_COMMON_LETTERS_CG,
41
    ) -> None:
42
        """Initialize Count instance.
43
44
        Parameters
45
        ----------
46
        n_bits : int
47 1
            Number of bits in the fingerprint returned
48
        most_common : list
49
            The most common tokens in the target language, ordered by frequency
50
51
52
        .. versionadded:: 0.4.0
53
54
        """
55
        super(Occurrence, self).__init__()
56
        self._n_bits = n_bits
57
        self._most_common = most_common
58
59
    def fingerprint(self, word: str) -> str:
60
        """Return the occurrence fingerprint.
61 1
62 1
        Parameters
63 1
        ----------
64
        word : str
65 1
            The word to fingerprint
66
67
        Returns
68
        -------
69
        str
70
            The occurrence fingerprint
71
72
        Examples
73
        --------
74
        >>> of = Occurrence()
75
        >>> of.fingerprint('hat')
76
        '0110000100000000'
77
        >>> of.fingerprint('niall')
78
        '0010110000100000'
79
        >>> of.fingerprint('colin')
80
        '0001110000110000'
81
        >>> of.fingerprint('atcg')
82
        '0110000000010000'
83
        >>> of.fingerprint('entreatment')
84
        '1110010010000100'
85
86
87
        .. versionadded:: 0.3.0
88
        .. versionchanged:: 0.3.6
89
            Encapsulated in class
90
        .. versionchanged:: 0.6.0
91
            Changed to return a str and added fingerprint_int method
92
93
        """
94
        return ('{:0' + str(self._n_bits) + 'b}').format(
95
            self.fingerprint_int(word)
96
        )
97
98 1
    def fingerprint_int(self, word: str) -> int:
99 1
        """Return the occurrence fingerprint.
100 1
101
        Parameters
102 1
        ----------
103 1
        word : str
104 1
            The word to fingerprint
105 1
106 1
        Returns
107 1
        -------
108
        int
109 1
            The occurrence fingerprint as an int
110
111 1
        Examples
112 1
        --------
113 1
        >>> of = Occurrence()
114
        >>> of.fingerprint_int('hat')
115 1
        24832
116
        >>> of.fingerprint_int('niall')
117
        11296
118 1
        >>> of.fingerprint_int('colin')
119
        7216
120
        >>> of.fingerprint_int('atcg')
121
        24592
122
        >>> of.fingerprint_int('entreatment')
123
        58500
124 1
125
126
        .. versionadded:: 0.6.0
127
128
        """
129
        n_bits = self._n_bits
130
        fingerprint = 0
131
132
        for letter in self._most_common:
133
            if letter in set(word):
134
                fingerprint += 1
135
            n_bits -= 1
136
            if n_bits:
137
                fingerprint <<= 1
138
            else:
139
                break
140
141
        n_bits -= 1
142
        if n_bits > 0:
143
            fingerprint <<= n_bits
144
145
        return fingerprint
146
147
148
if __name__ == '__main__':
149
    import doctest
150
151
    doctest.testmod()
152