HuffmanCoding::encode()   A
last analyzed

Complexity

Conditions 3
Paths 3

Size

Total Lines 18
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 13
dl 0
loc 18
rs 9.8333
c 1
b 0
f 0
cc 3
nc 3
nop 2
1
<?php
2
3
namespace Mfonte\Base62x\Compression\Huffman;
4
5
use Mfonte\Base62x\Exception\CompressionException;
6
use Mfonte\Base62x\Compression\Huffman\Binary\BitStreamReader;
7
use Mfonte\Base62x\Compression\Huffman\Binary\BitStreamWriter;
8
9
/**
10
 *	Implementation of a Huffman Coding
11
 *	http://en.wikipedia.org/wiki/Huffman_coding.
12
 */
13
class HuffmanCoding
14
{
15
    public const SYMBOL_EOF = 'EOF';
16
17
    /**
18
     * 	create a code tree whose weights and symbols come from the sample indicated
19
     * 	NOTE: if a character isn't in this sample, it can't be encoded with the generated tree.
20
     */
21
    public static function createCodeTree($sample)
22
    {
23
        $weights = [];
24
        for ($i = 0; $i < mb_strlen($sample); ++$i) {
25
            if (!isset($weights[$sample[$i]])) {
26
                $weights[$sample[$i]] = 0;
27
            }
28
            ++$weights[$sample[$i]];
29
        }
30
        $weights[self::SYMBOL_EOF] = 1;	//	add the EOF marker to the encoding
31
32
        $queue = new HuffmanNodeQueue();
33
        arsort($weights);
34
        foreach ($weights as $symbol => $weight) {
35
            $queue->addNode(new HuffmanNode($symbol, $weight));
36
        }
37
38
        while ($nodes = $queue->popTwoNodes()) {
39
            $parentNode = HuffmanNode::join($nodes[0], $nodes[1]);
40
            $queue->addNode($parentNode);
41
        }
42
43
        return $queue->getOnlyNode();
44
    }
45
46
    /**
47
     * 	encode the given data using the Huffman tree.
48
     */
49
    public static function encode($data, HuffmanNode $codeTree)
50
    {
51
        $codeHash = [];
52
        $codeTree->getCodeHash($codeHash);
53
        $stream = new BitStreamWriter();
54
        for ($i = 0; $i < mb_strlen($data); ++$i) {
55
            $symbol = $data[$i];
56
            if (isset($codeHash[$symbol])) {
57
                $stream->writeString($codeHash[$symbol]);
58
            } else {
59
                throw new CompressionException('huffman', "NOTE: Cannot encode symbol {$symbol}. It was not found in the encoding tree.");
60
            }
61
        }
62
        $stream->writeString($codeHash[self::SYMBOL_EOF]);
63
        $encodedTree = (string) $codeTree;
64
        $encodedData = $stream->getData();
65
66
        return $encodedTree.$encodedData;
67
    }
68
69
    /**
70
     * 	decode the data using the code tree.
71
     */
72
    public static function decode($data)
73
    {
74
        $rootNode = HuffmanNode::loadFromString($data);
75
        $currentNode = $rootNode;
76
        $reader = new BitStreamReader($data);
77
        $decoded = '';
78
        while (true) {
79
            if ($currentNode->isLeaf()) {
80
                $nextSymbol = $currentNode->getSymbol();
81
                if ($nextSymbol === self::SYMBOL_EOF) {
82
                    return $decoded;
83
                } else {
84
                    $decoded .= $nextSymbol;
85
                    $currentNode = $rootNode;
86
                }
87
            } else {
88
                $bit = $reader->readBit();
89
                if ($bit === null) {
90
                    throw new CompressionException('huffman', 'Reached the end of the encoded data, but did not find the EOF symbol.');
91
                } else {
92
                    $currentNode = $bit
93
                        ? $currentNode->getRightChild()
94
                        : $currentNode->getLeftChild();
95
                }
96
            }
97
        }
98
    }
99
}
100