Passed
Push — master ( d2a11f...643512 )
by Chris
01:59 queued 12s
created

abydos.distance._vps   A

Complexity

Total Complexity 11

Size/Duplication

Total Lines 114
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 11
eloc 39
dl 0
loc 114
ccs 30
cts 30
cp 1
rs 10
c 0
b 0
f 0

1 Method

Rating   Name   Duplication   Size   Complexity  
C VPS.sim() 0 62 11
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2019 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._vps.
20
21
Victorian Panel Study (VPS) score
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from collections import defaultdict
32
33 1
from ._distance import _Distance
34
35 1
__all__ = ['VPS']
36
37
38 1
class VPS(_Distance):
39
    """Victorian Panel Study (VPS) score.
40
41
    VPS score is presented in :cite:`Shurer:2007`.
42
43
    .. versionadded:: 0.4.1
44
    """
45
46 1
    def sim(self, src, tar):
47
        """Return the Victorian Panel Study score of two words.
48
49
        Parameters
50
        ----------
51
        src : str
52
            Source string for comparison
53
        tar : str
54
            Target string for comparison
55
56
        Returns
57
        -------
58
        float
59
            The VPS score
60
61
        Examples
62
        --------
63
        >>> cmp = VPS()
64
        >>> cmp.sim('cat', 'hat')
65
        0.5
66
        >>> cmp.sim('Niall', 'Neil')
67
        0.3
68
        >>> cmp.sim('aluminum', 'Catalan')
69
        0.14285714285714285
70
        >>> cmp.sim('ATCG', 'TAGC')
71
        0.3333333333333333
72
73
74
        .. versionadded:: 0.4.1
75
76
        """
77 1
        if src == tar:
78 1
            return 1.0
79 1
        if len(src) < len(tar):
80 1
            src, tar = tar, src
81
82 1
        score = 0
83 1
        discount = 0
84
85 1
        src_tokens = defaultdict(set)
86 1
        tar_tokens = defaultdict(set)
87 1
        for slen in range(1, 4):
88 1
            for i in range(len(src) - slen + 1):
89 1
                src_tokens[src[i : i + slen]].add(i)
90 1
            for i in range(len(tar) - slen + 1):
91 1
                tar_tokens[tar[i : i + slen]].add(i)
92
93 1
        for token in src_tokens.keys():
94 1
            if token in tar_tokens:
95 1
                for src_pos in src_tokens[token]:
96 1
                    score += 1
97 1
                    if src_pos not in tar_tokens[token]:
98 1
                        discount += min(
99
                            abs(src_pos - tar_pos)
100
                            for tar_pos in tar_tokens[token]
101
                        )
102
103 1
        score -= discount / max(len(src), len(tar))
104 1
        if score:
105 1
            score /= 3 * len(src) - 3
106
107 1
        return score
108
109
110
if __name__ == '__main__':
111
    import doctest
112
113
    doctest.testmod()
114