1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* Huffman.php |
5
|
|
|
* |
6
|
|
|
* PHP version 5 |
7
|
|
|
* |
8
|
|
|
* @category Dcrypt |
9
|
|
|
* @package Dcrypt |
10
|
|
|
* @author Michael Meyer (mmeyer2k) <[email protected]> |
11
|
|
|
* @license http://opensource.org/licenses/MIT The MIT License (MIT) |
12
|
|
|
* @link https://github.com/mmeyer2k/dcrypt |
13
|
|
|
*/ |
14
|
|
|
|
15
|
|
|
namespace Dcrypt; |
16
|
|
|
|
17
|
|
|
/** |
18
|
|
|
* Huffman compression functions. |
19
|
|
|
* |
20
|
|
|
* @category Dcrypt |
21
|
|
|
* @package Dcrypt |
22
|
|
|
* @author Michael Meyer (mmeyer2k) <[email protected]> |
23
|
|
|
* @license http://opensource.org/licenses/MIT The MIT License (MIT) |
24
|
|
|
* @link https://github.com/mmeyer2k/dcrypt |
25
|
|
|
* @link https://apigen.ci/github/mmeyer2k/dcrypt/namespace-Dcrypt.html |
26
|
|
|
*/ |
27
|
|
|
final class Huffman |
28
|
|
|
{ |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* Compress data |
32
|
|
|
* |
33
|
|
|
* @param string $data |
34
|
|
|
* @return string |
35
|
|
|
*/ |
36
|
|
|
public static function encode($data) |
37
|
|
|
{ |
38
|
|
|
$dictionary = self::frequencyMap($data); |
39
|
|
|
|
40
|
|
|
$binaryString = ''; |
41
|
|
|
|
42
|
|
|
$indexMap = self::createBinaryIndexes(count($dictionary)); |
43
|
|
|
|
44
|
|
|
// Map the binary data to the index |
45
|
|
|
foreach (str_split($data) as $d) { |
46
|
|
|
// Find character position in dictionary array |
47
|
|
|
$pos = array_search(base64_encode($d), array_keys($dictionary)); |
48
|
|
|
|
49
|
|
|
// Append the binary to the running string |
50
|
|
|
$binaryString = $binaryString . $indexMap[$pos]; |
51
|
|
|
} |
52
|
|
|
|
53
|
|
|
// Pad the string to the byte boundry |
54
|
|
|
while (strlen($binaryString) % 8 !== 0) { |
55
|
|
|
$binaryString = $binaryString . '0'; |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
// Chunk data into bytes |
59
|
|
|
$chunks = str_split($binaryString, 8); |
60
|
|
|
|
61
|
|
|
// Pack the binary string |
62
|
|
|
foreach ($chunks as $i => &$chunk) { |
63
|
|
|
$padded = str_pad($chunk, 8, '0', STR_PAD_RIGHT); |
64
|
|
|
$chunk = chr(bindec($padded)); |
|
|
|
|
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
// Return compressed data with packed dictionary |
68
|
|
|
return self::packDictionary($dictionary) . implode($chunks); |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
/** |
72
|
|
|
* Decompress data |
73
|
|
|
* |
74
|
|
|
* @param string $data |
75
|
|
|
* @return string |
76
|
|
|
*/ |
77
|
|
|
public static function decode($data) |
78
|
|
|
{ |
79
|
|
|
$dictionary = self::unpackDictionary($data); |
80
|
|
|
|
81
|
|
|
// Remove dictionary bytes from beginning of data |
82
|
|
|
$data = str_split(substr($data, count($dictionary) * 2 + 1)); |
83
|
|
|
|
84
|
|
|
$binary = ''; |
85
|
|
|
|
86
|
|
|
// Convert data to binary |
87
|
|
|
while ($data) { |
|
|
|
|
88
|
|
|
$b = decbin(ord(array_shift($data))); |
89
|
|
|
|
90
|
|
|
// Pad to the left with zeros |
91
|
|
|
$b = str_pad($b, 8, '0', STR_PAD_LEFT); |
92
|
|
|
|
93
|
|
|
$binary .= $b; |
94
|
|
|
} |
95
|
|
|
|
96
|
|
|
$binary = str_split($binary); |
97
|
|
|
|
98
|
|
|
$pop = ''; |
99
|
|
|
$out = ''; |
100
|
|
|
|
101
|
|
|
while ($binary) { |
|
|
|
|
102
|
|
|
$pop .= array_shift($binary); |
103
|
|
|
if (isset($dictionary[$pop])) { |
104
|
|
|
$out .= $dictionary[$pop]; |
105
|
|
|
$pop = ''; |
|
|
|
|
106
|
|
|
} |
107
|
|
|
} |
108
|
|
|
|
109
|
|
|
return $out; |
110
|
|
|
} |
111
|
|
|
|
112
|
|
|
/** |
113
|
|
|
* Creates a packed dictionary. |
114
|
|
|
* |
115
|
|
|
* @param array $dictionary |
116
|
|
|
* @return string |
117
|
|
|
*/ |
118
|
|
|
private static function packDictionary(array $dictionary) |
119
|
|
|
{ |
120
|
|
|
// First byte will be the count of items in the dictionary |
121
|
|
|
$out = chr(count($dictionary)); |
122
|
|
|
|
123
|
|
|
// Get the binary index mapping |
124
|
|
|
$indexMap = self::createBinaryIndexTree(count($dictionary)); |
|
|
|
|
125
|
|
|
|
126
|
|
|
$dictionary = array_keys($dictionary); |
127
|
|
|
|
128
|
|
|
foreach ($dictionary as $idx => $char) { |
129
|
|
|
$out = $out . chr(bindec($indexMap[$idx])) . base64_decode($char); |
130
|
|
|
} |
131
|
|
|
|
132
|
|
|
return $out; |
133
|
|
|
} |
134
|
|
|
|
135
|
|
|
/** |
136
|
|
|
* Unpack dictionary |
137
|
|
|
* |
138
|
|
|
* @param $data |
139
|
|
|
* @return array |
140
|
|
|
*/ |
141
|
|
|
private static function unpackDictionary($data) |
142
|
|
|
{ |
143
|
|
|
// Get first byte which is dictionary size |
144
|
|
|
$count = ord(substr($data, 0, 1)); |
|
|
|
|
145
|
|
|
$packedBytes = str_split(substr($data, 1, $count * 2)); |
146
|
|
|
|
147
|
|
|
$out = []; |
148
|
|
|
|
149
|
|
|
while ($packedBytes) { |
|
|
|
|
150
|
|
|
$idx = array_shift($packedBytes); |
151
|
|
|
$val = array_shift($packedBytes); |
152
|
|
|
|
153
|
|
|
$out[decbin(ord($idx))] = $val; |
154
|
|
|
} |
155
|
|
|
|
156
|
|
|
return $out; |
157
|
|
|
} |
158
|
|
|
|
159
|
|
|
/** |
160
|
|
|
* Returns a bit mapping array like this: |
161
|
|
|
* |
162
|
|
|
* 100 |
163
|
|
|
* 101 |
164
|
|
|
* 111 |
165
|
|
|
* |
166
|
|
|
* @param $count |
167
|
|
|
* @return array |
168
|
|
|
*/ |
169
|
|
|
private static function createBinaryIndexes($count) |
170
|
|
|
{ |
171
|
|
|
// Start a counter to base our binary frame on |
172
|
|
|
$startOffset = 0; |
173
|
|
|
|
174
|
|
|
// Start a loop that we will manually break out of |
175
|
|
|
while (1) { |
176
|
|
|
$out = array(); |
177
|
|
|
|
178
|
|
|
foreach (range(1, $count) as $range) { |
179
|
|
|
$out[] = decbin($startOffset + $range); |
180
|
|
|
} |
181
|
|
|
|
182
|
|
|
// Make sure that no index is the prefix of any another |
183
|
|
|
$found = false; |
184
|
|
|
foreach ($out as $v1) { |
185
|
|
|
foreach ($out as $v2) { |
186
|
|
|
if ($v1 !== $v2 && starts_with($v1, $v2)) { |
187
|
|
|
$found = true; |
188
|
|
|
} |
189
|
|
|
} |
190
|
|
|
} |
191
|
|
|
|
192
|
|
|
// Once a mapping is accepted, return it as an array |
193
|
|
|
if (!$found) { |
194
|
|
|
return $out; |
195
|
|
|
} |
196
|
|
|
|
197
|
|
|
// ... otherwise, try again from the next highest position |
198
|
|
|
$startOffset = $startOffset + 1; |
199
|
|
|
} |
200
|
|
|
} |
201
|
|
|
|
202
|
|
|
/** |
203
|
|
|
* Map character frequency as array. Returns array like this: |
204
|
|
|
* |
205
|
|
|
* array( |
206
|
|
|
* |
207
|
|
|
* ) |
208
|
|
|
* |
209
|
|
|
* @param string $data |
210
|
|
|
* @return array |
211
|
|
|
*/ |
212
|
|
|
private static function frequencyMap($data) |
213
|
|
|
{ |
214
|
|
|
$occurences = array(); |
215
|
|
|
|
216
|
|
|
while (isset($data[0])) { |
217
|
|
|
// Count occurences for the first char and add to frequency map |
218
|
|
|
$occurences[base64_encode($data[0])] = substr_count($data, $data[0]); |
219
|
|
|
|
220
|
|
|
$data = str_replace($data[0], '', $data); |
221
|
|
|
} |
222
|
|
|
|
223
|
|
|
// Sort the resulting array |
224
|
|
|
asort($occurences); |
225
|
|
|
|
226
|
|
|
// Return the array in descending order |
227
|
|
|
return array_reverse($occurences); |
228
|
|
|
} |
229
|
|
|
} |
230
|
|
|
|
This check looks for multiple assignments in successive lines of code. It will report an issue if the operators are not in a straight line.
To visualize
will produce issues in the first and second line, while this second example
will produce no issues.