Completed
Push — master ( f43547...71985b )
by Chris
12:00 queued 10s
created

abydos.fingerprint._count   A

Complexity

Total Complexity 6

Size/Duplication

Total Lines 136
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
eloc 29
dl 0
loc 136
ccs 22
cts 22
cp 1
rs 10
c 0
b 0
f 0
wmc 6

1 Function

Rating   Name   Duplication   Size   Complexity  
A count_fingerprint() 0 34 1

1 Method

Rating   Name   Duplication   Size   Complexity  
A Count.fingerprint() 0 50 5
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.fingerprint._count.
20
21
Cisłak & Grabowski's count fingerprint
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from collections import Counter
32
33 1
from ._fingerprint import MOST_COMMON_LETTERS_CG, _Fingerprint
34
35 1
__all__ = ['Count', 'count_fingerprint']
36
37
38 1
class Count(_Fingerprint):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
39
    """Count Fingerprint.
40
41
    Based on the count fingerprint from :cite:`Cislak:2017`.
42
    """
43
44 1
    def fingerprint(self, word, n_bits=16, most_common=MOST_COMMON_LETTERS_CG):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'fingerprint' method
Loading history...
45
        """Return the count fingerprint.
46
47
        Parameters
48
        ----------
49
        word : str
50
            The word to fingerprint
51
        n_bits : int
52
            Number of bits in the fingerprint returned
53
        most_common : list
54
            The most common tokens in the target language, ordered by frequency
55
56
        Returns
57
        -------
58
        int
59
            The count fingerprint
60
61
        Examples
62
        --------
63
        >>> cf = Count()
64
        >>> bin(cf.fingerprint('hat'))
65
        '0b1010000000001'
66
        >>> bin(cf.fingerprint('niall'))
67
        '0b10001010000'
68
        >>> bin(cf.fingerprint('colin'))
69
        '0b101010000'
70
        >>> bin(cf.fingerprint('atcg'))
71
        '0b1010000000000'
72
        >>> bin(cf.fingerprint('entreatment'))
73
        '0b1111010000100000'
74
75
        """
76 1
        if n_bits % 2:
77 1
            n_bits += 1
78
79 1
        word = Counter(word)
80 1
        fingerprint = 0
81
82 1
        for letter in most_common:
83 1
            if n_bits:
84 1
                fingerprint <<= 2
85 1
                fingerprint += word[letter] & 3
86 1
                n_bits -= 2
87
            else:
88 1
                break
89
90 1
        if n_bits:
91 1
            fingerprint <<= n_bits
92
93 1
        return fingerprint
94
95
96 1
def count_fingerprint(word, n_bits=16, most_common=MOST_COMMON_LETTERS_CG):
97
    """Return the count fingerprint.
98
99
    This is a wrapper for :py:meth:`Count.fingerprint`.
100
101
    Parameters
102
    ----------
103
    word : str
104
        The word to fingerprint
105
    n_bits : int
106
        Number of bits in the fingerprint returned
107
    most_common : list
108
        The most common tokens in the target language, ordered by frequency
109
110
    Returns
111
    -------
112
    int
113
        The count fingerprint
114
115
    Examples
116
    --------
117
    >>> bin(count_fingerprint('hat'))
118
    '0b1010000000001'
119
    >>> bin(count_fingerprint('niall'))
120
    '0b10001010000'
121
    >>> bin(count_fingerprint('colin'))
122
    '0b101010000'
123
    >>> bin(count_fingerprint('atcg'))
124
    '0b1010000000000'
125
    >>> bin(count_fingerprint('entreatment'))
126
    '0b1111010000100000'
127
128
    """
129 1
    return Count().fingerprint(word, n_bits, most_common)
130
131
132
if __name__ == '__main__':
133
    import doctest
134
135
    doctest.testmod()
136