Completed
Push — master ( 643512...2b6b3e )
by Chris
20:40 queued 10:36
created

abydos.distance._raup_crick   A

Complexity

Total Complexity 3

Size/Duplication

Total Lines 137
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 3
eloc 35
dl 0
loc 137
ccs 19
cts 19
cp 1
rs 10
c 0
b 0
f 0

2 Methods

Rating   Name   Duplication   Size   Complexity  
A RaupCrick.__init__() 0 13 1
A RaupCrick.sim() 0 57 2
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2019 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._raup_crick.
20
21
Raup-Crick similarity
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from math import factorial
32
33 1
from ._token_distance import _TokenDistance
34
35 1
__all__ = ['RaupCrick']
36
37
38 1
class RaupCrick(_TokenDistance):
39
    r"""Raup-Crick similarity.
40
41
    For two sets X and Y and a population N, Raup-Crick similarity
42
    :cite:`Raup:1979` is:
43
44
    .. math:
45
46
        sim_{Raup-Crick}(X, Y) = \sum_{i=0}^{|X \cap Y|}
47
        \frac{|X|! |Y|! (|N| - |X|)!
48
        (|N|- |Y|)!}{|N|! |X \cap Y|! (|X| - i)!
49
        (|Y| - i)! (|N| - |X| - |Y| + i)!}
50
51
    Notes
52
    -----
53
    Observe that Raup-Crick similarity is related to Henderson-Heron similarity
54
    in that the former is the sum of all Henderson-Heron similarities for an
55
    intersection size ranging from 0 to the true intersection size.
56
57
    .. versionadded:: 0.4.1
58
    """
59
60 1
    def __init__(self, **kwargs):
61
        """Initialize RaupCrick instance.
62
63
        Parameters
64
        ----------
65
        **kwargs
66
            Arbitrary keyword arguments
67
68
69
        .. versionadded:: 0.4.1
70
71
        """
72 1
        super(RaupCrick, self).__init__(**kwargs)
73
74 1
    def sim(self, src, tar):
75
        """Return the Raup-Crick similarity of two strings.
76
77
        Parameters
78
        ----------
79
        src : str
80
            Source string for comparison
81
        tar : str
82
            Target string for comparison
83
84
        Returns
85
        -------
86
        float
87
            Raup-Crick similarity
88
89
        Examples
90
        --------
91
        >>> cmp = RaupCrick()
92
        >>> cmp.sim('cat', 'hat')
93
        0.9999998002120004
94
        >>> cmp.sim('Niall', 'Neil')
95
        0.9999975146378747
96
        >>> cmp.sim('aluminum', 'Catalan')
97
        0.9968397599851411
98
        >>> cmp.sim('ATCG', 'TAGC')
99
        0.9684367974410505
100
101
102
        .. versionadded:: 0.4.1
103
104
        """
105 1
        if src == tar:
106 1
            return 1.0
107
108 1
        self._tokenize(src, tar)
109
110 1
        a = self._intersection_card()
111 1
        ab = self._src_card()
112 1
        ac = self._tar_card()
113 1
        n = self._population_unique_card()
114
115 1
        def _henderson_heron(ab, ac, a, n):
116 1
            return (
117
                factorial(ab)
118
                * factorial(ac)
119
                * factorial(n - ab)
120
                * factorial(n - ac)
121
                / (
122
                    factorial(n)
123
                    * factorial(a)
124
                    * factorial(ab - a)
125
                    * factorial(ac - a)
126
                    * factorial((n - ac - ab + a))
127
                )
128
            )
129
130 1
        return sum(_henderson_heron(ab, ac, i, n) for i in range(0, a + 1))
131
132
133
if __name__ == '__main__':
134
    import doctest
135
136
    doctest.testmod()
137