Completed
Push — master ( 643512...2b6b3e )
by Chris
20:40 queued 10:36
created

abydos.distance._henderson_heron   A

Complexity

Total Complexity 2

Size/Duplication

Total Lines 124
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 2
eloc 31
dl 0
loc 124
ccs 15
cts 15
cp 1
rs 10
c 0
b 0
f 0

2 Methods

Rating   Name   Duplication   Size   Complexity  
A HendersonHeron.dist() 0 49 1
A HendersonHeron.__init__() 0 13 1
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2019 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._henderson_heron.
20
21
Henderson-Heron dissimilarity
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from math import factorial
32
33 1
from ._token_distance import _TokenDistance
34
35 1
__all__ = ['HendersonHeron']
36
37
38 1
class HendersonHeron(_TokenDistance):
39
    r"""Henderson-Heron dissimilarity.
40
41
    For two sets X and Y and a population N, Henderson-Heron dissimilarity
42
    :cite:`Henderson:1977` is:
43
44
    .. math:
45
46
        sim_{Henderson-Heron}(X, Y) = \frac{|X|! |Y|! (|N| - |X|)!
47
        (|N|- |Y|)!}{|N|! |X \cap Y|! (|X| - |X \cap Y|)!
48
        (|Y| - |Y \cap X|)! (|N| - |X| - |Y| + |X \cap Y|)!}
49
50
    .. versionadded:: 0.4.1
51
    """
52
53 1
    def __init__(self, **kwargs):
54
        """Initialize HendersonHeron instance.
55
56
        Parameters
57
        ----------
58
        **kwargs
59
            Arbitrary keyword arguments
60
61
62
        .. versionadded:: 0.4.1
63
64
        """
65 1
        super(HendersonHeron, self).__init__(**kwargs)
66
67 1
    def dist(self, src, tar):
68
        """Return the Henderson-Heron dissimilarity of two strings.
69
70
        Parameters
71
        ----------
72
        src : str
73
            Source string for comparison
74
        tar : str
75
            Target string for comparison
76
77
        Returns
78
        -------
79
        float
80
            Henderson-Heron dissimilarity
81
82
        Examples
83
        --------
84
        >>> cmp = HendersonHeron()
85
        >>> cmp.dist('cat', 'hat')
86
        0.00011668873858680838
87
        >>> cmp.dist('Niall', 'Neil')
88
        0.00048123075776606097
89
        >>> cmp.dist('aluminum', 'Catalan')
90
        0.08534181060514882
91
        >>> cmp.dist('ATCG', 'TAGC')
92
        0.9684367974410505
93
94
95
        .. versionadded:: 0.4.1
96
97
        """
98 1
        self._tokenize(src, tar)
99
100 1
        a = self._intersection_card()
101 1
        ab = self._src_card()
102 1
        ac = self._tar_card()
103 1
        n = self._population_unique_card()
104
105 1
        return (
106
            factorial(ab)
107
            * factorial(ac)
108
            * factorial(n - ab)
109
            * factorial(n - ac)
110
            / (
111
                factorial(n)
112
                * factorial(a)
113
                * factorial(ab - a)
114
                * factorial(ac - a)
115
                * factorial((n - ac - ab + a))
116
            )
117
        )
118
119
120
if __name__ == '__main__':
121
    import doctest
122
123
    doctest.testmod()
124