Completed
Push — master ( f43547...71985b )
by Chris
12:00 queued 10s
created

abydos.distance._ncd_bwtrle   A

Complexity

Total Complexity 4

Size/Duplication

Total Lines 158
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
eloc 29
dl 0
loc 158
ccs 19
cts 19
cp 1
rs 10
c 0
b 0
f 0
wmc 4

2 Functions

Rating   Name   Duplication   Size   Complexity  
A dist_ncd_bwtrle() 0 30 1
A sim_ncd_bwtrle() 0 30 1

1 Method

Rating   Name   Duplication   Size   Complexity  
A NCDbwtrle.dist() 0 40 2
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._ncd_bwtrle.
20
21
NCD using BWT plus RLE
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from ._ncd_rle import NCDrle
32 1
from ..compression import BWT
33
34
35 1
__all__ = ['NCDbwtrle', 'dist_ncd_bwtrle', 'sim_ncd_bwtrle']
36
37
38 1
class NCDbwtrle(NCDrle):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
39
    """Normalized Compression Distance using BWT plus RLE.
40
41
    Cf. https://en.wikipedia.org/wiki/Burrows-Wheeler_transform
42
43
    Normalized compression distance (NCD) :cite:`Cilibrasi:2005`.
44
    """
45
46 1
    _bwt = BWT()
47
48 1
    def dist(self, src, tar):
49
        """Return the NCD between two strings using BWT plus RLE.
50
51
        Parameters
52
        ----------
53
        src : str
54
            Source string for comparison
55
        tar : str
56
            Target string for comparison
57
58
        Returns
59
        -------
60
        float
61
            Compression distance
62
63
        Examples
64
        --------
65
        >>> cmp = NCDbwtrle()
66
        >>> cmp.dist('cat', 'hat')
67
        0.75
68
        >>> cmp.dist('Niall', 'Neil')
69
        0.8333333333333334
70
        >>> cmp.dist('aluminum', 'Catalan')
71
        1.0
72
        >>> cmp.dist('ATCG', 'TAGC')
73
        0.8
74
75
        """
76 1
        if src == tar:
77 1
            return 0.0
78
79 1
        src_comp = self._rle.encode(self._bwt.encode(src))
80 1
        tar_comp = self._rle.encode(self._bwt.encode(tar))
81 1
        concat_comp = self._rle.encode(self._bwt.encode(src + tar))
82 1
        concat_comp2 = self._rle.encode(self._bwt.encode(tar + src))
83
84 1
        return (
85
            min(len(concat_comp), len(concat_comp2))
86
            - min(len(src_comp), len(tar_comp))
87
        ) / max(len(src_comp), len(tar_comp))
88
89
90 1
def dist_ncd_bwtrle(src, tar):
91
    """Return the NCD between two strings using BWT plus RLE.
92
93
    This is a wrapper for :py:meth:`NCDbwtrle.dist`.
94
95
    Parameters
96
    ----------
97
    src : str
98
        Source string for comparison
99
    tar : str
100
        Target string for comparison
101
102
    Returns
103
    -------
104
    float
105
        Compression distance
106
107
    Examples
108
    --------
109
    >>> dist_ncd_bwtrle('cat', 'hat')
110
    0.75
111
    >>> dist_ncd_bwtrle('Niall', 'Neil')
112
    0.8333333333333334
113
    >>> dist_ncd_bwtrle('aluminum', 'Catalan')
114
    1.0
115
    >>> dist_ncd_bwtrle('ATCG', 'TAGC')
116
    0.8
117
118
    """
119 1
    return NCDbwtrle().dist(src, tar)
120
121
122 1
def sim_ncd_bwtrle(src, tar):
123
    """Return the NCD similarity between two strings using BWT plus RLE.
124
125
    This is a wrapper for :py:meth:`NCDbwtrle.sim`.
126
127
    Parameters
128
    ----------
129
    src : str
130
        Source string for comparison
131
    tar : str
132
        Target string for comparison
133
134
    Returns
135
    -------
136
    float
137
        Compression similarity
138
139
    Examples
140
    --------
141
    >>> sim_ncd_bwtrle('cat', 'hat')
142
    0.25
143
    >>> sim_ncd_bwtrle('Niall', 'Neil')
144
    0.16666666666666663
145
    >>> sim_ncd_bwtrle('aluminum', 'Catalan')
146
    0.0
147
    >>> sim_ncd_bwtrle('ATCG', 'TAGC')
148
    0.19999999999999996
149
150
    """
151 1
    return NCDbwtrle().sim(src, tar)
152
153
154
if __name__ == '__main__':
155
    import doctest
156
157
    doctest.testmod()
158