Passed
Push — master ( d2a11f...643512 )
by Chris
01:59 queued 12s
created

abydos.fingerprint._extract_position_frequency   A

Complexity

Total Complexity 4

Size/Duplication

Total Lines 130
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 4
eloc 56
dl 0
loc 130
ccs 18
cts 18
cp 1
rs 10
c 0
b 0
f 0

1 Method

Rating   Name   Duplication   Size   Complexity  
A ExtractPositionFrequency.fingerprint() 0 44 4
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2019 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.fingerprint._extract_position_frequence.
20
21
Taft's extract - position & frequency coding
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from ._fingerprint import _Fingerprint
32
33 1
__all__ = ['ExtractPositionFrequency']
34
35
36 1
class ExtractPositionFrequency(_Fingerprint):
37
    """Extract - Position & Frequency fingerprint.
38
39
    Based on the extract - position & frequency coding from :cite:`Taft:1970`.
40
41
    .. versionadded:: 0.4.1
42
    """
43
44 1
    _frequency = {
45
        x: y
46
        for x, y in zip(
47
            'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
48
            (
49
                5,
50
                1,
51
                5,
52
                0,
53
                7,
54
                1,
55
                2,
56
                5,
57
                6,
58
                0,
59
                1,
60
                5,
61
                1,
62
                3,
63
                4,
64
                3,
65
                0,
66
                4,
67
                5,
68
                3,
69
                4,
70
                1,
71
                1,
72
                0,
73
                2,
74
                1,
75
            ),
76
        )
77
    }
78 1
    _position = (0, 1, 2, 3, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7)
79
80 1
    def fingerprint(self, word):
81
        """Return the extract - position & frequency coding.
82
83
        Parameters
84
        ----------
85
        word : str
86
            The word to fingerprint
87
88
        Returns
89
        -------
90
        int
91
            The extract - position & frequency coding
92
93
        Examples
94
        --------
95
        >>> fp = ExtractPositionFrequency()
96
        >>> fp.fingerprint('hat')
97
        'HAT'
98
        >>> fp.fingerprint('niall')
99
        'NILL'
100
        >>> fp.fingerprint('colin')
101
        'COLN'
102
        >>> fp.fingerprint('atcg')
103
        'ATCG'
104
        >>> fp.fingerprint('entreatment')
105
        'NMNT'
106
107
108
        .. versionadded:: 0.4.1
109
110
        """
111
        # uppercase & reverse
112 1
        word = [_ for _ in word.upper() if _ in self._frequency]
113 1
        scores = [[] for _ in range(len(word))]
114
115 1
        pos = 0
116 1
        for i in range(len(word)):
117 1
            scores[pos].append(self._frequency[word[pos]])
118 1
            scores[pos][0] += self._position[min(i, 15)]
119 1
            scores[pos].append(len(word) + pos if pos < 0 else pos)
120 1
            pos = -(pos if pos < 0 else pos + 1)
121 1
        positions = sorted(pos[1] for pos in sorted(scores, reverse=True)[-4:])
122
123 1
        return ''.join(word[_] for _ in positions)
124
125
126
if __name__ == '__main__':
127
    import doctest
128
129
    doctest.testmod()
130