Completed
Push — master ( 643512...2b6b3e )
by Chris
20:40 queued 10:36
created

abydos.distance._clark   A

Complexity

Total Complexity 3

Size/Duplication

Total Lines 118
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 3
eloc 28
dl 0
loc 118
ccs 15
cts 15
cp 1
rs 10
c 0
b 0
f 0

2 Methods

Rating   Name   Duplication   Size   Complexity  
A Clark.__init__() 0 13 1
A Clark.dist() 0 48 2
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2019 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._clark.
20
21
Clark's coefficient of divergence
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from ._token_distance import _TokenDistance
32
33 1
__all__ = ['Clark']
34
35
36 1
class Clark(_TokenDistance):
37
    r"""Clark's coefficient of divergence.
38
39
    For two sets X and Y and a population N, Clark's coefficient of divergence
40
    :cite:`Clark:1952` is:
41
42
        .. math::
43
44
            dist_{Clark}(X, Y) = \sqrt{\frac{\sum_{i=0}^{|N|}
45
            \big(\frac{x_i-y_i}{x_i+y_i}\big)^2}{|N|}}
46
47
    .. versionadded:: 0.4.1
48
    """
49
50 1
    def __init__(self, **kwargs):
51
        """Initialize Clark instance.
52
53
        Parameters
54
        ----------
55
        **kwargs
56
            Arbitrary keyword arguments
57
58
59
        .. versionadded:: 0.4.1
60
61
        """
62 1
        super(Clark, self).__init__(**kwargs)
63
64 1
    def dist(self, src, tar):
65
        """Return Clark's coefficient of divergence of two strings.
66
67
        Parameters
68
        ----------
69
        src : str
70
            Source string for comparison
71
        tar : str
72
            Target string for comparison
73
74
        Returns
75
        -------
76
        float
77
            Clark's coefficient of divergence
78
79
        Examples
80
        --------
81
        >>> cmp = Clark()
82
        >>> cmp.dist('cat', 'hat')
83
        0.816496580927726
84
        >>> cmp.dist('Niall', 'Neil')
85
        0.8819171036881969
86
        >>> cmp.dist('aluminum', 'Catalan')
87
        0.9660917830792959
88
        >>> cmp.dist('ATCG', 'TAGC')
89
        1.0
90
91
92
        .. versionadded:: 0.4.1
93
94
        """
95 1
        if src == tar:
96 1
            return 0.0
97
98 1
        self._tokenize(src, tar)
99
100 1
        src_tok = self._src_tokens
101 1
        tar_tok = self._tar_tokens
102 1
        alphabet = set(set(src_tok.keys()) | set(tar_tok.keys()))
103
104 1
        return (
105
            sum(
106
                ((src_tok[ltr] - tar_tok[ltr]) / (src_tok[ltr] + tar_tok[ltr]))
107
                ** 2
108
                for ltr in alphabet
109
            )
110
            / len(alphabet)
111
        ) ** 0.5
112
113
114
if __name__ == '__main__':
115
    import doctest
116
117
    doctest.testmod()
118