Passed
Push — master ( c2a3b6...15a61d )
by Chris
01:00 queued 14s
created

abydos.compression._arithmetic.ac_decode()   A

Complexity

Conditions 1

Size

Total Lines 39
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1

Importance

Changes 0
Metric Value
eloc 9
dl 0
loc 39
ccs 4
cts 4
cp 1
rs 9.95
c 0
b 0
f 0
cc 1
nop 3
crap 1
1
# Copyright 2014-2020 by Christopher C. Little.
2
# This file is part of Abydos.
3
#
4
# Abydos is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# Abydos is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17
"""abydos.compression._arithmetic.
18
19 1
Arithmetic coder/decoder
20
"""
21
22
from collections import Counter
23
from fractions import Fraction
24 1
25
__all__ = ['Arithmetic']
26
27
28
class Arithmetic(object):
29
    """Arithmetic Coder.
30
31 1
    This is based on Andrew Dalke's public domain implementation
32 1
    :cite:`Dalke:2005`. It has been ported to use the fractions.Fraction class.
33
34 1
35
    .. versionadded:: 0.3.6
36 1
    """
37
38 1
    _probs = {}
39
40
    def __init__(self, text=None):
41
        """Initialize arithmetic coder object.
42
43 1
        Parameters
44
        ----------
45
        text : str
46 1
            The training text
47
48
49
        .. versionadded:: 0.3.6
50
51
        """
52
        if text is not None:
53
            self.train(text)
54
55
    def get_probs(self):
56 1
        """Return the probs dictionary.
57
58 1
        Returns
59
        -------
60
        dict
61
            The dictionary of probabilities
62
63
64
        .. versionadded:: 0.3.6
65
66
        """
67
        return self._probs
68
69
    def set_probs(self, probs):
70 1
        """Set the probs dictionary.
71 1
72
        Parameters
73 1
        ----------
74
        probs : dict
75
            The dictionary of probabilities
76
77
78
        .. versionadded:: 0.3.6
79
80
        """
81
        self._probs = probs
82
83
    def train(self, text):
84
        r"""Generate a probability dict from the provided text.
85 1
86
        Text to 0-order probability statistics as a dict
87 1
88
        Parameters
89
        ----------
90
        text : str
91
            The text data over which to calculate probability statistics. This
92
            must not contain the NUL (0x00) character because that is used to
93
            indicate the end of data.
94
95
        Example
96
        -------
97
        >>> ac = Arithmetic()
98
        >>> ac.train('the quick brown fox jumped over the lazy dog')
99 1
        >>> ac.get_probs()
100
        {' ': (Fraction(0, 1), Fraction(8, 45)),
101 1
         'o': (Fraction(8, 45), Fraction(4, 15)),
102
         'e': (Fraction(4, 15), Fraction(16, 45)),
103
         'u': (Fraction(16, 45), Fraction(2, 5)),
104
         't': (Fraction(2, 5), Fraction(4, 9)),
105
         'r': (Fraction(4, 9), Fraction(22, 45)),
106
         'h': (Fraction(22, 45), Fraction(8, 15)),
107
         'd': (Fraction(8, 15), Fraction(26, 45)),
108
         'z': (Fraction(26, 45), Fraction(3, 5)),
109
         'y': (Fraction(3, 5), Fraction(28, 45)),
110
         'x': (Fraction(28, 45), Fraction(29, 45)),
111
         'w': (Fraction(29, 45), Fraction(2, 3)),
112
         'v': (Fraction(2, 3), Fraction(31, 45)),
113
         'q': (Fraction(31, 45), Fraction(32, 45)),
114
         'p': (Fraction(32, 45), Fraction(11, 15)),
115
         'n': (Fraction(11, 15), Fraction(34, 45)),
116
         'm': (Fraction(34, 45), Fraction(7, 9)),
117
         'l': (Fraction(7, 9), Fraction(4, 5)),
118
         'k': (Fraction(4, 5), Fraction(37, 45)),
119
         'j': (Fraction(37, 45), Fraction(38, 45)),
120
         'i': (Fraction(38, 45), Fraction(13, 15)),
121
         'g': (Fraction(13, 15), Fraction(8, 9)),
122
         'f': (Fraction(8, 9), Fraction(41, 45)),
123
         'c': (Fraction(41, 45), Fraction(14, 15)),
124
         'b': (Fraction(14, 15), Fraction(43, 45)),
125
         'a': (Fraction(43, 45), Fraction(44, 45)),
126
         '\x00': (Fraction(44, 45), Fraction(1, 1))}
127
128
129
        .. versionadded:: 0.1.0
130
        .. versionchanged:: 0.3.6
131
            Encapsulated in class
132
133
        """
134
        if '\x00' in text:
135
            text = text.replace('\x00', ' ')
136
        counts = Counter(text)
137
        counts['\x00'] = 1
138
        tot_letters = sum(counts.values())
139
140
        tot = 0
141
        self._probs = {}
142
        prev = Fraction(0)
143
        for char, count in sorted(
144
            counts.items(), key=lambda x: (x[1], x[0]), reverse=True
145
        ):
146
            follow = Fraction(tot + count, tot_letters)
147
            self._probs[char] = (prev, follow)
148
            prev = follow
149
            tot = tot + count
150
151
    def encode(self, text):
152 1
        """Encode a text using arithmetic coding.
153 1
154 1
        Text and the 0-order probability statistics -> longval, nbits
155 1
156 1
        The encoded number is Fraction(longval, 2**nbits)
157 1
158
        Parameters
159 1
        ----------
160 1
        text : str
161 1
            A string to encode
162 1
163
        Returns
164
        -------
165 1
        tuple
166 1
            The arithmetically coded text
167 1
168 1
        Example
169
        -------
170 1
        >>> ac = Arithmetic('the quick brown fox jumped over the lazy dog')
171
        >>> ac.encode('align')
172
        (16720586181, 34)
173
174
175
        .. versionadded:: 0.1.0
176
        .. versionchanged:: 0.3.6
177
            Encapsulated in class
178
179
        """
180
        if '\x00' in text:
181
            text = text.replace('\x00', ' ')
182
        minval = Fraction(0)
183
        maxval = Fraction(1)
184
185
        for char in text + '\x00':
186
            prob_range = self._probs[char]
187
            delta = maxval - minval
188
            maxval = minval + prob_range[1] * delta
189
            minval = minval + prob_range[0] * delta
190
191
        # I tried without the /2 just to check.  Doesn't work.
192
        # Keep scaling up until the error range is >= 1.  That
193
        # gives me the minimum number of bits needed to resolve
194
        # down to the end-of-data character.
195
        delta = (maxval - minval) / 2
196
        nbits = int(0)
197
        while delta < 1:
198
            nbits += 1
199 1
            delta *= 2
200 1
        # The below condition shouldn't ever be false
201 1
        if nbits == 0:  # pragma: no cover
202 1
            return 0, 0
203 1
        # using -1 instead of /2
204
        avg = (maxval + minval) * 2 ** (nbits - 1)
205 1
        # Could return a rational instead ...
206 1
        # the division truncation is deliberate
207 1
        return avg.numerator // avg.denominator, nbits
208 1
209 1
    def decode(self, longval, nbits):
210
        """Decode the number to a string using the given statistics.
211
212
        Parameters
213
        ----------
214
        longval : int
215 1
            The first part of an encoded tuple from encode
216 1
        nbits : int
217 1
            The second part of an encoded tuple from encode
218 1
219 1
        Returns
220
        -------
221
        str
222
            The arithmetically decoded text
223
224 1
        Example
225
        -------
226
        >>> ac = Arithmetic('the quick brown fox jumped over the lazy dog')
227 1
        >>> ac.decode(16720586181, 34)
228
        'align'
229 1
230
231
        .. versionadded:: 0.1.0
232
        .. versionchanged:: 0.3.6
233
            Encapsulated in class
234
235
        """
236
        val = Fraction(longval, int(1) << nbits)
237
        letters = []
238
239
        probs_items = [
240
            (char, minval, maxval)
241
            for (char, (minval, maxval)) in self._probs.items()
242
        ]
243
244
        char = '\x00'
245
        while True:
246
            for (char, minval, maxval) in probs_items:  # noqa: B007
247
                if minval <= val < maxval:
248
                    break
249
250
            if char == '\x00':
251
                break
252
            letters.append(char)
253
            delta = maxval - minval
0 ignored issues
show
introduced by
The variable minval does not seem to be defined for all execution paths.
Loading history...
introduced by
The variable maxval does not seem to be defined for all execution paths.
Loading history...
254
            val = (val - minval) / delta
255
        return ''.join(letters)
256 1
257 1
258
if __name__ == '__main__':
259 1
    import doctest
260
261
    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
262