Completed
Branch master (78a222)
by Chris
14:36
created

abydos.compression._rle.rle_decode()   A

Complexity

Conditions 5

Size

Total Lines 47
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 5

Importance

Changes 0
Metric Value
eloc 14
dl 0
loc 47
ccs 14
cts 14
cp 1
rs 9.2333
c 0
b 0
f 0
cc 5
nop 2
crap 5
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.compression._rle.
20
21
Run-Length Encoding encoder/decoder (rle_encoder & rle_decoder)
22
"""
23
24 1
from __future__ import unicode_literals
25
26 1
from itertools import groupby
27
28 1
from ._bwt import bwt_decode, bwt_encode
29
30
31 1
__all__ = ['rle_decode', 'rle_encode']
32
33
34 1
def rle_encode(text, use_bwt=True):
35
    r"""Perform encoding of run-length-encoding (RLE).
36
37
    Cf. :cite:`Robinson:1967`.
38
39
    Based on http://rosettacode.org/wiki/Run-length_encoding#Python
40
    :cite:`rosettacode:2018`. This is licensed GFDL 1.2.
41
42
    Digits 0-9 cannot be in text.
43
44
    :param str text: a text string to encode
45
    :param bool use_bwt: boolean indicating whether to perform BWT encoding
46
        before RLE encoding
47
    :returns: word decoded by BWT
48
    :rtype: str
49
50
    >>> rle_encode('align')
51
    'n\x00ilag'
52
    >>> rle_encode('align', use_bwt=False)
53
    'align'
54
55
    >>> rle_encode('banana')
56
    'annb\x00aa'
57
    >>> rle_encode('banana', use_bwt=False)
58
    'banana'
59
60
    >>> rle_encode('aaabaabababa')
61
    'ab\x00abbab5a'
62
    >>> rle_encode('aaabaabababa', False)
63
    '3abaabababa'
64
    """
65 1
    if use_bwt:
66 1
        text = bwt_encode(text)
67 1
    if text:
68 1
        text = ((len(list(g)), k) for k, g in groupby(text))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable k does not seem to be defined.
Loading history...
Comprehensibility Best Practice introduced by
The variable g does not seem to be defined.
Loading history...
69 1
        text = (
70
            (str(n) + k if n > 2 else (k if n == 1 else 2 * k))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable n does not seem to be defined.
Loading history...
71
            for n, k in text
72
        )
73 1
    return ''.join(text)
74
75
76 1
def rle_decode(text, use_bwt=True):
77
    r"""Perform decoding of run-length-encoding (RLE).
78
79
    Cf. :cite:`Robinson:1967`.
80
81
    Based on http://rosettacode.org/wiki/Run-length_encoding#Python
82
    :cite:`rosettacode:2018`. This is licensed GFDL 1.2.
83
84
    Digits 0-9 cannot have been in the original text.
85
86
    :param str text: a text string to decode
87
    :param bool use_bwt: boolean indicating whether to perform BWT decoding
88
        after RLE decoding
89
    :returns: word decoded by BWT
90
    :rtype: str
91
92
    >>> rle_decode('n\x00ilag')
93
    'align'
94
    >>> rle_decode('align', use_bwt=False)
95
    'align'
96
97
    >>> rle_decode('annb\x00aa')
98
    'banana'
99
    >>> rle_decode('banana', use_bwt=False)
100
    'banana'
101
102
    >>> rle_decode('ab\x00abbab5a')
103
    'aaabaabababa'
104
    >>> rle_decode('3abaabababa', False)
105
    'aaabaabababa'
106
    """
107 1
    mult = ''
108 1
    decoded = []
109 1
    for letter in list(text):
110 1
        if not letter.isdigit():
111 1
            if mult:
112 1
                decoded.append(int(mult) * letter)
113 1
                mult = ''
114
            else:
115 1
                decoded.append(letter)
116
        else:
117 1
            mult += letter
118
119 1
    text = ''.join(decoded)
120 1
    if use_bwt:
121 1
        text = bwt_decode(text)
122 1
    return text
123
124
125
if __name__ == '__main__':
126
    import doctest
127
128
    doctest.testmod()
129