Passed
Push — master ( d2a11f...643512 )
by Chris
01:59 queued 12s
created

abydos.fingerprint._extract   A

Complexity

Total Complexity 8

Size/Duplication

Total Lines 127
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 8
eloc 35
dl 0
loc 127
ccs 23
cts 23
cp 1
rs 10
c 0
b 0
f 0

2 Methods

Rating   Name   Duplication   Size   Complexity  
A Extract.__init__() 0 30 4
A Extract.fingerprint() 0 45 4
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2019 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.fingerprint._extract.
20
21
Taft's extract letter list coding
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from ._fingerprint import _Fingerprint
32
33 1
__all__ = ['Extract']
34
35
36 1
class Extract(_Fingerprint):
37
    """Extract Letter List fingerprint.
38
39
    Based on the extract letter list coding from :cite:`Taft:1970`,
40
    for lists 1, 2, 3, & 4.
41
42
    .. versionadded:: 0.4.1
43
    """
44
45 1
    def __init__(self, letter_list=1):
46
        """Initialize Extract instance.
47
48
        Parameters
49
        ----------
50
        letter_list : int or iterable
51
            If an integer (1-4) is supplied, Taft's specified letter lists are
52
            used. If an iterable is supplied, its values will be used as the
53
            list of letters to remove (in order).
54
55
56
        .. versionadded:: 0.4.1
57
58
        """
59 1
        letter_lists = [
60
            'ETAONRISHDLFCMUGYPWBVKXJQZ',
61
            'ETASIONRHCDLPMFBUWGYKVJQZX',
62
            'ETAONISRHLDCUMFYWGPKBVXJQZ',
63
            'EARNLOISTHDMCBGUWYJKPFVZXQ',
64
        ]
65
66 1
        super(_Fingerprint, self).__init__()
67 1
        self._letter_list = letter_list
68 1
        if isinstance(self._letter_list, int) and 1 <= self._letter_list <= 4:
69 1
            self._letter_list = list(letter_lists[self._letter_list - 1])
70 1
        elif hasattr(self._letter_list, '__getitem__'):
71
            # TODO: revert the above to __iter__ after removing Py2.7 support
72 1
            self._letter_list = list(self._letter_list)
73
        else:
74 1
            self._letter_list = list(letter_lists[0])
75
76 1
    def fingerprint(self, word):
77
        """Return the extract letter list coding.
78
79
        Parameters
80
        ----------
81
        word : str
82
            The word to fingerprint
83
84
        Returns
85
        -------
86
        int
87
            The extract letter list coding
88
89
        Examples
90
        --------
91
        >>> fp = Extract()
92
        >>> fp.fingerprint('hat')
93
        'HAT'
94
        >>> fp.fingerprint('niall')
95
        'NILL'
96
        >>> fp.fingerprint('colin')
97
        'CLIN'
98
        >>> fp.fingerprint('atcg')
99
        'ATCG'
100
        >>> fp.fingerprint('entreatment')
101
        'NRMN'
102
103
104
        .. versionadded:: 0.4.1
105
106
        """
107
        # uppercase & reverse
108 1
        word = word.upper()[::-1]
109
110 1
        for letter in self._letter_list:  # pragma: no branch
111 1
            if len(word) < 5:
112 1
                break
113
114 1
            count = word.count(letter)
115 1
            if count:
116 1
                word = word.replace(
117
                    letter, '', count - (4 - (len(word) - count))
118
                )
119
120 1
        return word[::-1]
121
122
123
if __name__ == '__main__':
124
    import doctest
125
126
    doctest.testmod()
127