Completed
Push — master ( f43547...71985b )
by Chris
12:00 queued 10s
created

abydos.distance._bag.dist_bag()   A

Complexity

Conditions 1

Size

Total Lines 30
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 2
dl 0
loc 30
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._bag.
20
21
Bag similarity & distance
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from collections import Counter
32
33 1
from ._token_distance import _TokenDistance
34
35 1
__all__ = ['Bag', 'bag', 'dist_bag', 'sim_bag']
36
37
38 1
class Bag(_TokenDistance):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
39
    """Bag distance.
40
41
    Bag distance is proposed in :cite:`Bartolini:2002`. It is defined as:
42
    :math:`max(|multiset(src)-multiset(tar)|, |multiset(tar)-multiset(src)|)`.
43
    """
44
45 1
    def dist_abs(self, src, tar):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'dist_abs' method
Loading history...
46
        """Return the bag distance between two strings.
47
48
        Parameters
49
        ----------
50
        src : str
51
            Source string for comparison
52
        tar : str
53
            Target string for comparison
54
55
        Returns
56
        -------
57
        int
58
            Bag distance
59
60
        Examples
61
        --------
62
        >>> cmp = Bag()
63
        >>> cmp.dist_abs('cat', 'hat')
64
        1
65
        >>> cmp.dist_abs('Niall', 'Neil')
66
        2
67
        >>> cmp.dist_abs('aluminum', 'Catalan')
68
        5
69
        >>> cmp.dist_abs('ATCG', 'TAGC')
70
        0
71
        >>> cmp.dist_abs('abcdefg', 'hijklm')
72
        7
73
        >>> cmp.dist_abs('abcdefg', 'hijklmno')
74
        8
75
76
        """
77 1
        if tar == src:
78 1
            return 0
79 1
        elif not src:
80 1
            return len(tar)
81 1
        elif not tar:
82 1
            return len(src)
83
84 1
        src_bag = Counter(src)
85 1
        tar_bag = Counter(tar)
86 1
        return max(
87
            sum((src_bag - tar_bag).values()),
88
            sum((tar_bag - src_bag).values()),
89
        )
90
91 1
    def dist(self, src, tar):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'dist' method
Loading history...
92
        """Return the normalized bag distance between two strings.
93
94
        Bag distance is normalized by dividing by :math:`max( |src|, |tar| )`.
95
96
        Parameters
97
        ----------
98
        src : str
99
            Source string for comparison
100
        tar : str
101
            Target string for comparison
102
103
        Returns
104
        -------
105
        float
106
            Normalized bag distance
107
108
        Examples
109
        --------
110
        >>> cmp = Bag()
111
        >>> cmp.dist('cat', 'hat')
112
        0.3333333333333333
113
        >>> cmp.dist('Niall', 'Neil')
114
        0.4
115
        >>> cmp.dist('aluminum', 'Catalan')
116
        0.625
117
        >>> cmp.dist('ATCG', 'TAGC')
118
        0.0
119
120
        """
121 1
        if tar == src:
122 1
            return 0.0
123 1
        if not src or not tar:
124 1
            return 1.0
125
126 1
        max_length = max(len(src), len(tar))
127
128 1
        return self.dist_abs(src, tar) / max_length
129
130
131 1
def bag(src, tar):
132
    """Return the bag distance between two strings.
133
134
    This is a wrapper for :py:meth:`Bag.dist_abs`.
135
136
    Parameters
137
    ----------
138
    src : str
139
        Source string for comparison
140
    tar : str
141
        Target string for comparison
142
143
    Returns
144
    -------
145
    int
146
        Bag distance
147
148
    Examples
149
    --------
150
    >>> bag('cat', 'hat')
151
    1
152
    >>> bag('Niall', 'Neil')
153
    2
154
    >>> bag('aluminum', 'Catalan')
155
    5
156
    >>> bag('ATCG', 'TAGC')
157
    0
158
    >>> bag('abcdefg', 'hijklm')
159
    7
160
    >>> bag('abcdefg', 'hijklmno')
161
    8
162
163
    """
164 1
    return Bag().dist_abs(src, tar)
165
166
167 1
def dist_bag(src, tar):
168
    """Return the normalized bag distance between two strings.
169
170
    This is a wrapper for :py:meth:`Bag.dist`.
171
172
    Parameters
173
    ----------
174
    src : str
175
        Source string for comparison
176
    tar : str
177
        Target string for comparison
178
179
    Returns
180
    -------
181
    float
182
        Normalized bag distance
183
184
    Examples
185
    --------
186
    >>> dist_bag('cat', 'hat')
187
    0.3333333333333333
188
    >>> dist_bag('Niall', 'Neil')
189
    0.4
190
    >>> dist_bag('aluminum', 'Catalan')
191
    0.625
192
    >>> dist_bag('ATCG', 'TAGC')
193
    0.0
194
195
    """
196 1
    return Bag().dist(src, tar)
197
198
199 1
def sim_bag(src, tar):
200
    """Return the normalized bag similarity of two strings.
201
202
    This is a wrapper for :py:meth:`Bag.sim`.
203
204
    Parameters
205
    ----------
206
    src : str
207
        Source string for comparison
208
    tar : str
209
        Target string for comparison
210
211
    Returns
212
    -------
213
    float
214
        Normalized bag similarity
215
216
    Examples
217
    --------
218
    >>> round(sim_bag('cat', 'hat'), 12)
219
    0.666666666667
220
    >>> sim_bag('Niall', 'Neil')
221
    0.6
222
    >>> sim_bag('aluminum', 'Catalan')
223
    0.375
224
    >>> sim_bag('ATCG', 'TAGC')
225
    1.0
226
227
    """
228 1
    return Bag().sim(src, tar)
229
230
231
if __name__ == '__main__':
232
    import doctest
233
234
    doctest.testmod()
235