Passed
Push — master ( 416c2f...9ec382 )
by Chris
01:03 queued 13s
created

Position.fingerprint()   A

Complexity

Conditions 1

Size

Total Lines 37
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 1

Importance

Changes 0
Metric Value
eloc 3
dl 0
loc 37
ccs 5
cts 5
cp 1
rs 10
c 0
b 0
f 0
cc 1
nop 2
crap 1
1
# Copyright 2018-2020 by Christopher C. Little.
2
# This file is part of Abydos.
3
#
4
# Abydos is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# Abydos is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17
"""abydos.fingerprint._position.
18
19 1
Cisłak & Grabowski's position fingerprint
20
"""
21
22
from typing import Dict, Tuple
23
24 1
from ._fingerprint import MOST_COMMON_LETTERS_CG, _Fingerprint
25
26
__all__ = ['Position']
27
28
29
class Position(_Fingerprint):
30
    """Position Fingerprint.
31 1
32
    Based on the position fingerprint from :cite:`Cislak:2017`.
33 1
34 1
    .. versionadded:: 0.3.6
35
    """
36 1
37
    def __init__(
38
        self,
39 1
        n_bits: int = 16,
40
        most_common: Tuple[str, ...] = MOST_COMMON_LETTERS_CG,
41
        bits_per_letter: int = 3,
42
    ) -> None:
43
        """Initialize Count instance.
44
45
        Parameters
46
        ----------
47 1
        n_bits : int
48
            Number of bits in the fingerprint returned
49
        most_common : list
50
            The most common tokens in the target language, ordered by frequency
51
52
53
        .. versionadded:: 0.4.0
54
55
        """
56
        super(Position, self).__init__()
57
        self._n_bits = n_bits
58
        self._most_common = most_common
59
        self._bits_per_letter = bits_per_letter
60
61
    def fingerprint(self, word: str) -> str:
62
        """Return the position fingerprint.
63 1
64 1
        Parameters
65 1
        ----------
66 1
        word : str
67
            The word to fingerprint
68 1
69
        Returns
70
        -------
71
        str
72
            The position fingerprint
73
74
        Examples
75
        --------
76
        >>> pf = Position()
77
        >>> pf.fingerprint('hat')
78
        '1110100011111111'
79
        >>> pf.fingerprint('niall')
80
        '1111110101110010'
81
        >>> pf.fingerprint('colin')
82
        '1111111110010111'
83
        >>> pf.fingerprint('atcg')
84
        '1110010001111111'
85
        >>> pf.fingerprint('entreatment')
86
        '0000101011111111'
87
88
89
        .. versionadded:: 0.3.0
90
        .. versionchanged:: 0.3.6
91
            Encapsulated in class
92
        .. versionchanged:: 0.6.0
93
            Changed to return a str and added fingerprint_int method
94
95
        """
96
        return ('{:0' + str(self._n_bits) + 'b}').format(
97
            self.fingerprint_int(word)
98
        )
99
100 1
    def fingerprint_int(self, word: str) -> int:
101 1
        """Return the position fingerprint.
102 1
103 1
        Parameters
104 1
        ----------
105
        word : str
106 1
            The word to fingerprint
107
108 1
        Returns
109 1
        -------
110 1
        int
111 1
            The position fingerprint as an int
112 1
113
        Examples
114 1
        --------
115
        >>> pf = Position()
116
        >>> pf.fingerprint_int('hat')
117 1
        59647
118
        >>> pf.fingerprint_int('niall')
119 1
        64882
120
        >>> pf.fingerprint_int('colin')
121 1
        65431
122 1
        >>> pf.fingerprint_int('atcg')
123 1
        58495
124
        >>> pf.fingerprint_int('entreatment')
125 1
        2815
126
127
128 1
        .. versionadded:: 0.6.0
129
130
        """
131
        n_bits = self._n_bits
132
        position = {}  # type: Dict[str, int]
133
        for pos, letter in enumerate(word):
134 1
            if letter not in position and letter in self._most_common:
135
                position[letter] = min(pos, 2 ** self._bits_per_letter - 1)
136
137
        fingerprint = 0
138
139
        for letter in self._most_common:
140
            if n_bits:
141
                fingerprint <<= min(self._bits_per_letter, n_bits)
142
                if letter in position:
143
                    fingerprint += min(position[letter], 2 ** n_bits - 1)
144
                else:
145
                    fingerprint += min(
146
                        2 ** self._bits_per_letter - 1, 2 ** n_bits - 1
147
                    )
148
                n_bits -= min(self._bits_per_letter, n_bits)
149
            else:
150
                break
151
152
        for _ in range(n_bits):
153
            fingerprint <<= 1
154
            fingerprint += 1
155
156
        return fingerprint
157
158
159
if __name__ == '__main__':
160
    import doctest
161
162
    doctest.testmod()
163