Completed
Push — master ( 643512...2b6b3e )
by Chris
20:40 queued 10:36
created

abydos.distance._baulieu_xi   A

Complexity

Total Complexity 4

Size/Duplication

Total Lines 157
Duplicated Lines 0 %

Test Coverage

Coverage 93.75%

Importance

Changes 0
Metric Value
wmc 4
eloc 32
dl 0
loc 157
ccs 15
cts 16
cp 0.9375
rs 10
c 0
b 0
f 0

2 Methods

Rating   Name   Duplication   Size   Complexity  
A BaulieuXI.__init__() 0 46 1
A BaulieuXI.dist() 0 42 3
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2019 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._baulieu_xi.
20
21
Baulieu XI distance
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from ._token_distance import _TokenDistance
32
33 1
__all__ = ['BaulieuXI']
34
35
36 1
class BaulieuXI(_TokenDistance):
37
    r"""Baulieu XI distance.
38
39
    For two sets X and Y and a population N, Baulieu XI distance
40
    :cite:`Baulieu:1997` is
41
42
        .. math::
43
44
            dist_{BaulieuXI}(X, Y) = \frac{|X \setminus Y| + |Y \setminus X|}
45
            {|X \setminus Y| + |Y \setminus X| + |(N \setminus X) \setminus Y|}
46
47
    This is Baulieu's 29th dissimilarity coefficient. This coefficient fails
48
    Baulieu's (P4) property, that :math:`D(a+1,b,c,d) \leq D(a,b,c,d) = 0`
49
    with equality holding iff :math:`D(a,b,c,d) = 0`.
50
51
    In :ref:`2x2 confusion table terms <confusion_table>`, where a+b+c+d=n,
52
    this is
53
54
        .. math::
55
56
            dist_{BaulieuXI} = \frac{b+c}{b+c+d}
57
58
    .. versionadded:: 0.4.0
59
    """
60
61 1
    def __init__(
62
        self,
63
        alphabet=None,
64
        tokenizer=None,
65
        intersection_type='crisp',
66
        **kwargs
67
    ):
68
        """Initialize BaulieuXI instance.
69
70
        Parameters
71
        ----------
72
        alphabet : Counter, collection, int, or None
73
            This represents the alphabet of possible tokens.
74
            See :ref:`alphabet <alphabet>` description in
75
            :py:class:`_TokenDistance` for details.
76
        tokenizer : _Tokenizer
77
            A tokenizer instance from the :py:mod:`abydos.tokenizer` package
78
        intersection_type : str
79
            Specifies the intersection type, and set type as a result:
80
            See :ref:`intersection_type <intersection_type>` description in
81
            :py:class:`_TokenDistance` for details.
82
        **kwargs
83
            Arbitrary keyword arguments
84
85
        Other Parameters
86
        ----------------
87
        qval : int
88
            The length of each q-gram. Using this parameter and tokenizer=None
89
            will cause the instance to use the QGram tokenizer with this
90
            q value.
91
        metric : _Distance
92
            A string distance measure class for use in the ``soft`` and
93
            ``fuzzy`` variants.
94
        threshold : float
95
            A threshold value, similarities above which are counted as
96
            members of the intersection for the ``fuzzy`` variant.
97
98
99
        .. versionadded:: 0.4.0
100
101
        """
102 1
        super(BaulieuXI, self).__init__(
103
            alphabet=alphabet,
104
            tokenizer=tokenizer,
105
            intersection_type=intersection_type,
106
            **kwargs
107
        )
108
109 1
    def dist(self, src, tar):
110
        """Return the Baulieu XI distance of two strings.
111
112
        Parameters
113
        ----------
114
        src : str
115
            Source string (or QGrams/Counter objects) for comparison
116
        tar : str
117
            Target string (or QGrams/Counter objects) for comparison
118
119
        Returns
120
        -------
121
        float
122
            Baulieu XI distance
123
124
        Examples
125
        --------
126
        >>> cmp = BaulieuXI()
127
        >>> cmp.dist('cat', 'hat')
128
        0.005115089514066497
129
        >>> cmp.dist('Niall', 'Neil')
130
        0.008951406649616368
131
        >>> cmp.dist('aluminum', 'Catalan')
132
        0.01913265306122449
133
        >>> cmp.dist('ATCG', 'TAGC')
134
        0.012755102040816327
135
136
137
        .. versionadded:: 0.4.0
138
139
        """
140 1
        if src == tar:
141 1
            return 0.0
142
143 1
        self._tokenize(src, tar)
144
145 1
        bpc = self._src_only_card() + self._tar_only_card()
146 1
        d = self._total_complement_card()
147
148 1
        if bpc:
149 1
            return bpc / (bpc + d)
150
        return 0.0
151
152
153
if __name__ == '__main__':
154
    import doctest
155
156
    doctest.testmod()
157