abydos.distance._baulieu_xi.BaulieuXI.__init__()   A
last analyzed

Complexity

Conditions 1

Size

Total Lines 48
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
eloc 13
dl 0
loc 48
ccs 2
cts 2
cp 1
rs 9.75
c 0
b 0
f 0
cc 1
nop 5
crap 1
1
# Copyright 2019-2020 by Christopher C. Little.
2
# This file is part of Abydos.
3
#
4
# Abydos is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# Abydos is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17
"""abydos.distance._baulieu_xi.
18
19 1
Baulieu XI distance
20
"""
21
22
from typing import Any, Counter as TCounter, Optional, Sequence, Set, Union
23
24 1
from ._token_distance import _TokenDistance
25
from ..tokenizer import _Tokenizer
26
27
__all__ = ['BaulieuXI']
28
29
30
class BaulieuXI(_TokenDistance):
31 1
    r"""Baulieu XI distance.
32
33 1
    For two sets X and Y and a population N, Baulieu XI distance
34
    :cite:`Baulieu:1997` is
35
36 1
        .. math::
37
38
            dist_{BaulieuXI}(X, Y) = \frac{|X \setminus Y| + |Y \setminus X|}
39
            {|X \setminus Y| + |Y \setminus X| + |(N \setminus X) \setminus Y|}
40
41
    This is Baulieu's 29th dissimilarity coefficient. This coefficient fails
42
    Baulieu's (P4) property, that :math:`D(a+1,b,c,d) \leq D(a,b,c,d) = 0`
43
    with equality holding iff :math:`D(a,b,c,d) = 0`.
44
45
    In :ref:`2x2 confusion table terms <confusion_table>`, where a+b+c+d=n,
46
    this is
47
48
        .. math::
49
50
            dist_{BaulieuXI} = \frac{b+c}{b+c+d}
51
52
    .. versionadded:: 0.4.0
53
    """
54
55
    def __init__(
56
        self,
57
        alphabet: Optional[
58
            Union[TCounter[str], Sequence[str], Set[str], int]
59
        ] = None,
60
        tokenizer: Optional[_Tokenizer] = None,
61 1
        intersection_type: str = 'crisp',
62
        **kwargs: Any
63
    ) -> None:
64
        """Initialize BaulieuXI instance.
65
66
        Parameters
67
        ----------
68
        alphabet : Counter, collection, int, or None
69
            This represents the alphabet of possible tokens.
70
            See :ref:`alphabet <alphabet>` description in
71
            :py:class:`_TokenDistance` for details.
72
        tokenizer : _Tokenizer
73
            A tokenizer instance from the :py:mod:`abydos.tokenizer` package
74
        intersection_type : str
75
            Specifies the intersection type, and set type as a result:
76
            See :ref:`intersection_type <intersection_type>` description in
77
            :py:class:`_TokenDistance` for details.
78
        **kwargs
79
            Arbitrary keyword arguments
80
81
        Other Parameters
82
        ----------------
83
        qval : int
84
            The length of each q-gram. Using this parameter and tokenizer=None
85
            will cause the instance to use the QGram tokenizer with this
86
            q value.
87
        metric : _Distance
88
            A string distance measure class for use in the ``soft`` and
89
            ``fuzzy`` variants.
90
        threshold : float
91
            A threshold value, similarities above which are counted as
92
            members of the intersection for the ``fuzzy`` variant.
93
94
95
        .. versionadded:: 0.4.0
96
97
        """
98
        super(BaulieuXI, self).__init__(
99
            alphabet=alphabet,
100
            tokenizer=tokenizer,
101
            intersection_type=intersection_type,
102 1
            **kwargs
103
        )
104
105
    def dist(self, src: str, tar: str) -> float:
106
        """Return the Baulieu XI distance of two strings.
107
108
        Parameters
109 1
        ----------
110
        src : str
111
            Source string (or QGrams/Counter objects) for comparison
112
        tar : str
113
            Target string (or QGrams/Counter objects) for comparison
114
115
        Returns
116
        -------
117
        float
118
            Baulieu XI distance
119
120
        Examples
121
        --------
122
        >>> cmp = BaulieuXI()
123
        >>> cmp.dist('cat', 'hat')
124
        0.005115089514066497
125
        >>> cmp.dist('Niall', 'Neil')
126
        0.008951406649616368
127
        >>> cmp.dist('aluminum', 'Catalan')
128
        0.01913265306122449
129
        >>> cmp.dist('ATCG', 'TAGC')
130
        0.012755102040816327
131
132
133
        .. versionadded:: 0.4.0
134
135
        """
136
        if src == tar:
137
            return 0.0
138
139
        self._tokenize(src, tar)
140 1
141 1
        bpc = self._src_only_card() + self._tar_only_card()
142
        d = self._total_complement_card()
143 1
144
        if bpc:
145 1
            return bpc / (bpc + d)
146 1
        return 0.0
147
148 1
149 1
if __name__ == '__main__':
150
    import doctest
151
152
    doctest.testmod()
153