Completed
Pull Request — master (#141)
by Chris
11:04
created

abydos.distance._ncd_lzma   A

Complexity

Total Complexity 5

Size/Duplication

Total Lines 152
Duplicated Lines 0 %

Test Coverage

Coverage 45.45%

Importance

Changes 0
Metric Value
eloc 36
dl 0
loc 152
ccs 10
cts 22
cp 0.4545
rs 10
c 0
b 0
f 0
wmc 5

1 Method

Rating   Name   Duplication   Size   Complexity  
A NCDlzma.dist() 0 45 3

2 Functions

Rating   Name   Duplication   Size   Complexity  
A dist_ncd_lzma() 0 24 1
A sim_ncd_lzma() 0 24 1
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._ncd_lzma.
20
21
NCD using lzma
22
"""
23
24 1
from __future__ import (
25
    unicode_literals,
26
    absolute_import,
27
    division,
28
    print_function,
29
)
30
31 1
from ._distance import _Distance
32
33 1
try:
34 1
    import lzma
35
except ImportError:  # pragma: no cover
36
    # If the system lacks the lzma library, that's fine, but lzma compression
37
    # similarity won't be supported.
38
    lzma = None
39
40 1
__all__ = ['NCDlzma', 'dist_ncd_lzma', 'sim_ncd_lzma']
41
42
43 1
class NCDlzma(_Distance):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
44
    """Normalized Compression Distance using lzma compression.
45
46
    Normalized compression distance (NCD) :cite:`Cilibrasi:2005`.
47
    """
48
49 1
    def dist(self, src, tar):
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'dist' method
Loading history...
50
        """Return the NCD between two strings using lzma compression.
51
52
        Args:
53
            src (str): Source string for comparison
54
            tar (str): Target string for comparison
55
56
        Returns:
57
            float: Compression distance
58
59
        Raises:
60
            ValueError: Install the PylibLZMA module in order to use lzma
61
62
        Examples:
63
            >>> cmp = NCDlzma()
64
            >>> cmp.dist('cat', 'hat')
65
            0.08695652173913043
66
            >>> cmp.dist('Niall', 'Neil')
67
            0.16
68
            >>> cmp.dist('aluminum', 'Catalan')
69
            0.16
70
            >>> cmp.dist('ATCG', 'TAGC')
71
            0.08695652173913043
72
73
        """
74
        if src == tar:
75
            return 0.0
76
77
        src = src.encode('utf-8')
78
        tar = tar.encode('utf-8')
79
80
        if lzma is not None:
81
            src_comp = lzma.compress(src)[14:]
82
            tar_comp = lzma.compress(tar)[14:]
83
            concat_comp = lzma.compress(src + tar)[14:]
84
            concat_comp2 = lzma.compress(tar + src)[14:]
85
        else:  # pragma: no cover
86
            raise ValueError(
87
                'Install the PylibLZMA module in order to use lzma'
88
            )
89
90
        return (
91
            min(len(concat_comp), len(concat_comp2))
92
            - min(len(src_comp), len(tar_comp))
93
        ) / max(len(src_comp), len(tar_comp))
94
95
96 1
def dist_ncd_lzma(src, tar):
97
    """Return the NCD between two strings using lzma compression.
98
99
    This is a wrapper for :py:meth:`NCDlzma.dist`.
100
101
    Args:
102
        src (str): Source string for comparison
103
        tar (str): Target string for comparison
104
105
    Returns:
106
        float: Compression distance
107
108
    Examples:
109
        >>> dist_ncd_lzma('cat', 'hat')
110
        0.08695652173913043
111
        >>> dist_ncd_lzma('Niall', 'Neil')
112
        0.16
113
        >>> dist_ncd_lzma('aluminum', 'Catalan')
114
        0.16
115
        >>> dist_ncd_lzma('ATCG', 'TAGC')
116
        0.08695652173913043
117
118
    """
119
    return NCDlzma().dist(src, tar)
120
121
122 1
def sim_ncd_lzma(src, tar):
123
    """Return the NCD similarity between two strings using lzma compression.
124
125
    This is a wrapper for :py:meth:`NCDlzma.sim`.
126
127
    Args:
128
        src (str): Source string for comparison
129
        tar (str): Target string for comparison
130
131
    Returns:
132
        float: Compression similarity
133
134
    Examples:
135
        >>> sim_ncd_lzma('cat', 'hat')
136
        0.9130434782608696
137
        >>> sim_ncd_lzma('Niall', 'Neil')
138
        0.84
139
        >>> sim_ncd_lzma('aluminum', 'Catalan')
140
        0.84
141
        >>> sim_ncd_lzma('ATCG', 'TAGC')
142
        0.9130434782608696
143
144
    """
145
    return NCDlzma().sim(src, tar)
146
147
148
if __name__ == '__main__':
149
    import doctest
150
151
    doctest.testmod()
152