Completed
Pull Request — master (#141)
by Chris
16:23
created

abydos.distance._ncd_lzma.NCDlzma.dist()   A

Complexity

Conditions 3

Size

Total Lines 53
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 1
CRAP Score 9.762

Importance

Changes 0
Metric Value
cc 3
eloc 16
nop 3
dl 0
loc 53
ccs 1
cts 11
cp 0.0909
crap 9.762
rs 9.6
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._ncd_lzma.
20
21
NCD using lzma
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from ._distance import _Distance
32
33 1
try:
34 1
    import lzma
35
except ImportError:  # pragma: no cover
36
    # If the system lacks the lzma library, that's fine, but lzma compression
37
    # similarity won't be supported.
38
    lzma = None
39
40 1
__all__ = ['NCDlzma', 'dist_ncd_lzma', 'sim_ncd_lzma']
41
42
43 1
class NCDlzma(_Distance):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
44
    """Normalized Compression Distance using lzma compression.
45
46
    Normalized compression distance (NCD) :cite:`Cilibrasi:2005`.
47
    """
48
49 1
    def dist(self, src, tar):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'dist' method
Loading history...
50
        """Return the NCD between two strings using lzma compression.
51
52
        Parameters
53
        ----------
54
        src : str
55
            Source string for comparison
56
        tar : str
57
            Target string for comparison
58
59
        Returns
60
        -------
61
        float
62
            Compression distance
63
64
        Raises
65
        ------
66
        ValueError
67
            Install the PylibLZMA module in order to use lzma
68
69
        Examples
70
        --------
71
        >>> cmp = NCDlzma()
72
        >>> cmp.dist('cat', 'hat')
73
        0.08695652173913043
74
        >>> cmp.dist('Niall', 'Neil')
75
        0.16
76
        >>> cmp.dist('aluminum', 'Catalan')
77
        0.16
78
        >>> cmp.dist('ATCG', 'TAGC')
79
        0.08695652173913043
80
81
        """
82
        if src == tar:
83
            return 0.0
84
85
        src = src.encode('utf-8')
86
        tar = tar.encode('utf-8')
87
88
        if lzma is not None:
89
            src_comp = lzma.compress(src)[14:]
90
            tar_comp = lzma.compress(tar)[14:]
91
            concat_comp = lzma.compress(src + tar)[14:]
92
            concat_comp2 = lzma.compress(tar + src)[14:]
93
        else:  # pragma: no cover
94
            raise ValueError(
95
                'Install the PylibLZMA module in order to use lzma'
96
            )
97
98
        return (
99
            min(len(concat_comp), len(concat_comp2))
100
            - min(len(src_comp), len(tar_comp))
101
        ) / max(len(src_comp), len(tar_comp))
102
103
104 1
def dist_ncd_lzma(src, tar):
105
    """Return the NCD between two strings using lzma compression.
106
107
    This is a wrapper for :py:meth:`NCDlzma.dist`.
108
109
    Parameters
110
    ----------
111
    src : str
112
        Source string for comparison
113
    tar : str
114
        Target string for comparison
115
116
    Returns
117
    -------
118
    float
119
        Compression distance
120
121
    Examples
122
    --------
123
    >>> dist_ncd_lzma('cat', 'hat')
124
    0.08695652173913043
125
    >>> dist_ncd_lzma('Niall', 'Neil')
126
    0.16
127
    >>> dist_ncd_lzma('aluminum', 'Catalan')
128
    0.16
129
    >>> dist_ncd_lzma('ATCG', 'TAGC')
130
    0.08695652173913043
131
132
    """
133
    return NCDlzma().dist(src, tar)
134
135
136 1
def sim_ncd_lzma(src, tar):
137
    """Return the NCD similarity between two strings using lzma compression.
138
139
    This is a wrapper for :py:meth:`NCDlzma.sim`.
140
141
    Parameters
142
    ----------
143
    src : str
144
        Source string for comparison
145
    tar : str
146
        Target string for comparison
147
148
    Returns
149
    -------
150
    float
151
        Compression similarity
152
153
    Examples
154
    --------
155
    >>> sim_ncd_lzma('cat', 'hat')
156
    0.9130434782608696
157
    >>> sim_ncd_lzma('Niall', 'Neil')
158
    0.84
159
    >>> sim_ncd_lzma('aluminum', 'Catalan')
160
    0.84
161
    >>> sim_ncd_lzma('ATCG', 'TAGC')
162
    0.9130434782608696
163
164
    """
165
    return NCDlzma().sim(src, tar)
166
167
168
if __name__ == '__main__':
169
    import doctest
170
171
    doctest.testmod()
172