Completed
Push — master ( f43547...71985b )
by Chris
12:00 queued 10s
created

abydos.distance._ncd_zlib   A

Complexity

Total Complexity 5

Size/Duplication

Total Lines 176
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
eloc 37
dl 0
loc 176
ccs 27
cts 27
cp 1
rs 10
c 0
b 0
f 0
wmc 5

2 Methods

Rating   Name   Duplication   Size   Complexity  
A NCDzlib.__init__() 0 10 1
A NCDzlib.dist() 0 47 2

2 Functions

Rating   Name   Duplication   Size   Complexity  
A dist_ncd_zlib() 0 30 1
A sim_ncd_zlib() 0 29 1
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._ncd_zlib.
20
21
NCD using zlib
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
import zlib
32
33 1
from ._distance import _Distance
34
35
36 1
__all__ = ['NCDzlib', 'dist_ncd_zlib', 'sim_ncd_zlib']
37
38
39 1
class NCDzlib(_Distance):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
40
    """Normalized Compression Distance using zlib compression.
41
42
    Cf. https://zlib.net/
43
44
    Normalized compression distance (NCD) :cite:`Cilibrasi:2005`.
45
    """
46
47 1
    _compressor = None
48
49 1
    def __init__(self, level=zlib.Z_DEFAULT_COMPRESSION):
50
        """Initialize zlib compressor.
51
52
        Parameters
53
        ----------
54
        level : int
55
            The compression level (0 to 9)
56
57
        """
58 1
        self._compressor = zlib.compressobj(level)
59
60 1
    def dist(self, src, tar):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'dist' method
Loading history...
61
        """Return the NCD between two strings using zlib compression.
62
63
        Parameters
64
        ----------
65
        src : str
66
            Source string for comparison
67
        tar : str
68
            Target string for comparison
69
70
        Returns
71
        -------
72
        float
73
            Compression distance
74
75
        Examples
76
        --------
77
        >>> cmp = NCDzlib()
78
        >>> cmp.dist('cat', 'hat')
79
        0.3333333333333333
80
        >>> cmp.dist('Niall', 'Neil')
81
        0.45454545454545453
82
        >>> cmp.dist('aluminum', 'Catalan')
83
        0.5714285714285714
84
        >>> cmp.dist('ATCG', 'TAGC')
85
        0.4
86
87
        """
88 1
        if src == tar:
89 1
            return 0.0
90
91 1
        src = src.encode('utf-8')
92 1
        tar = tar.encode('utf-8')
93
94 1
        self._compressor.compress(src)
95 1
        src_comp = self._compressor.flush(zlib.Z_FULL_FLUSH)
96 1
        self._compressor.compress(tar)
97 1
        tar_comp = self._compressor.flush(zlib.Z_FULL_FLUSH)
98 1
        self._compressor.compress(src + tar)
99 1
        concat_comp = self._compressor.flush(zlib.Z_FULL_FLUSH)
100 1
        self._compressor.compress(tar + src)
101 1
        concat_comp2 = self._compressor.flush(zlib.Z_FULL_FLUSH)
102
103 1
        return (
104
            min(len(concat_comp), len(concat_comp2))
105
            - min(len(src_comp), len(tar_comp))
106
        ) / max(len(src_comp), len(tar_comp))
107
108
109 1
def dist_ncd_zlib(src, tar):
110
    """Return the NCD between two strings using zlib compression.
111
112
    This is a wrapper for :py:meth:`NCDzlib.dist`.
113
114
    Parameters
115
    ----------
116
    src : str
117
        Source string for comparison
118
    tar : str
119
        Target string for comparison
120
121
    Returns
122
    -------
123
    float
124
        Compression distance
125
126
    Examples
127
    --------
128
    >>> dist_ncd_zlib('cat', 'hat')
129
    0.3333333333333333
130
    >>> dist_ncd_zlib('Niall', 'Neil')
131
    0.45454545454545453
132
    >>> dist_ncd_zlib('aluminum', 'Catalan')
133
    0.5714285714285714
134
    >>> dist_ncd_zlib('ATCG', 'TAGC')
135
    0.4
136
137
    """
138 1
    return NCDzlib().dist(src, tar)
139
140
141 1
def sim_ncd_zlib(src, tar):
142
    """Return the NCD similarity between two strings using zlib compression.
143
144
    This is a wrapper for :py:meth:`NCDzlib.sim`.
145
146
    Parameters
147
    ----------
148
    src : str
149
        Source string for comparison
150
    tar : str
151
        Target string for comparison
152
153
    Returns
154
    -------
155
    float: Compression similarity
156
157
    Examples
158
    --------
159
    >>> sim_ncd_zlib('cat', 'hat')
160
    0.6666666666666667
161
    >>> sim_ncd_zlib('Niall', 'Neil')
162
    0.5454545454545454
163
    >>> sim_ncd_zlib('aluminum', 'Catalan')
164
    0.4285714285714286
165
    >>> sim_ncd_zlib('ATCG', 'TAGC')
166
    0.6
167
168
    """
169 1
    return NCDzlib().sim(src, tar)
170
171
172
if __name__ == '__main__':
173
    import doctest
174
175
    doctest.testmod()
176