Completed
Push — master ( f43547...71985b )
by Chris
12:00 queued 10s
created

abydos.fingerprint._occurrence_halved   A

Complexity

Total Complexity 8

Size/Duplication

Total Lines 144
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
eloc 35
dl 0
loc 144
ccs 27
cts 27
cp 1
rs 10
c 0
b 0
f 0
wmc 8

1 Method

Rating   Name   Duplication   Size   Complexity  
B OccurrenceHalved.fingerprint() 0 58 7

1 Function

Rating   Name   Duplication   Size   Complexity  
A occurrence_halved_fingerprint() 0 36 1
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.fingerprint._occurrence_halved.
20
21
Cisłak & Grabowski's occurrence halved fingerprint
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from ._fingerprint import MOST_COMMON_LETTERS_CG, _Fingerprint
32
33 1
__all__ = ['OccurrenceHalved', 'occurrence_halved_fingerprint']
34
35
36 1
class OccurrenceHalved(_Fingerprint):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
37
    """Occurrence Halved Fingerprint.
38
39
    Based on the occurrence halved fingerprint from :cite:`Cislak:2017`.
40
    """
41
42 1
    def fingerprint(self, word, n_bits=16, most_common=MOST_COMMON_LETTERS_CG):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'fingerprint' method
Loading history...
43
        """Return the occurrence halved fingerprint.
44
45
        Based on the occurrence halved fingerprint from :cite:`Cislak:2017`.
46
47
        Parameters
48
        ----------
49
        word : str
50
            The word to fingerprint
51
        n_bits : int
52
            Number of bits in the fingerprint returned
53
        most_common : list
54
            The most common tokens in the target language, ordered by frequency
55
56
        Returns
57
        -------
58
        int
59
            The occurrence halved fingerprint
60
61
        Examples
62
        --------
63
        >>> ohf = OccurrenceHalved()
64
        >>> bin(ohf.fingerprint('hat'))
65
        '0b1010000000010'
66
        >>> bin(ohf.fingerprint('niall'))
67
        '0b10010100000'
68
        >>> bin(ohf.fingerprint('colin'))
69
        '0b1001010000'
70
        >>> bin(ohf.fingerprint('atcg'))
71
        '0b10100000000000'
72
        >>> bin(ohf.fingerprint('entreatment'))
73
        '0b1111010000110000'
74
75
        """
76 1
        if n_bits % 2:
77 1
            n_bits += 1
78
79 1
        w_len = len(word) // 2
80 1
        w_1 = set(word[:w_len])
81 1
        w_2 = set(word[w_len:])
82 1
        fingerprint = 0
83
84 1
        for letter in most_common:
85 1
            if n_bits:
86 1
                fingerprint <<= 1
87 1
                if letter in w_1:
88 1
                    fingerprint += 1
89 1
                fingerprint <<= 1
90 1
                if letter in w_2:
91 1
                    fingerprint += 1
92 1
                n_bits -= 2
93
            else:
94 1
                break
95
96 1
        if n_bits > 0:
97 1
            fingerprint <<= n_bits
98
99 1
        return fingerprint
100
101
102 1
def occurrence_halved_fingerprint(
103
    word, n_bits=16, most_common=MOST_COMMON_LETTERS_CG
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
104
):
105
    """Return the occurrence halved fingerprint.
106
107
    This is a wrapper for :py:meth:`OccurrenceHalved.fingerprint`.
108
109
    Parameters
110
    ----------
111
    word : str
112
        The word to fingerprint
113
    n_bits : int
114
        Number of bits in the fingerprint returned
115
    most_common : list
116
        The most common tokens in the target language, ordered by frequency
117
118
    Returns
119
    -------
120
    int
121
        The occurrence halved fingerprint
122
123
    Examples
124
    --------
125
    >>> bin(occurrence_halved_fingerprint('hat'))
126
    '0b1010000000010'
127
    >>> bin(occurrence_halved_fingerprint('niall'))
128
    '0b10010100000'
129
    >>> bin(occurrence_halved_fingerprint('colin'))
130
    '0b1001010000'
131
    >>> bin(occurrence_halved_fingerprint('atcg'))
132
    '0b10100000000000'
133
    >>> bin(occurrence_halved_fingerprint('entreatment'))
134
    '0b1111010000110000'
135
136
    """
137 1
    return OccurrenceHalved().fingerprint(word, n_bits, most_common)
138
139
140
if __name__ == '__main__':
141
    import doctest
142
143
    doctest.testmod()
144