1 | <?php |
||
2 | |||
3 | namespace Faytekin\LZW; |
||
4 | |||
5 | class LZW |
||
6 | { |
||
7 | public function compress(string $uncompress): string |
||
8 | { |
||
9 | $dictSize = 256; |
||
10 | $dictionary = []; |
||
11 | |||
12 | for ($i = 0; $i < 256; $i++) { |
||
13 | $dictionary[$this->uniChr($i)] = $i; |
||
14 | } |
||
15 | |||
16 | $w = ''; |
||
17 | $result = []; |
||
18 | |||
19 | for ($i = 0; $i < strlen($uncompress); $i++) { |
||
20 | $c = $this->charAt($uncompress, $i); |
||
21 | $wc = $w.$c; |
||
22 | |||
23 | if (isset($dictionary[$wc])) { |
||
24 | $w = $wc; |
||
25 | } else { |
||
26 | array_push($result, $dictionary[$w]); |
||
27 | |||
28 | $dictionary[$wc] = $dictSize++; |
||
29 | $w = strval($c); |
||
30 | } |
||
31 | } |
||
32 | |||
33 | if ($w != '') { |
||
34 | array_push($result, $dictionary[$w]); |
||
35 | } |
||
36 | |||
37 | return implode(',', $result); |
||
38 | } |
||
39 | |||
40 | public function decompress(string $compressed): ?string |
||
41 | { |
||
42 | $compressed = explode(',', $compressed); |
||
43 | $dictSize = 256; |
||
44 | $dictionary = []; |
||
45 | |||
46 | for ($i = 1; $i < $dictSize; $i++) { |
||
47 | $dictionary[$i] = $this->uniChr($i); |
||
48 | } |
||
49 | |||
50 | $w = $this->uniChr($compressed[0]); |
||
51 | $result = $w; |
||
52 | |||
53 | for ($i = 1; $i < count($compressed); $i++) { |
||
0 ignored issues
–
show
|
|||
54 | $k = $compressed[$i]; |
||
55 | |||
56 | if (isset($dictionary[$k])) { |
||
57 | $entry = $dictionary[$k]; |
||
58 | } elseif ($k == $dictSize) { |
||
59 | $entry = $w.$this->charAt($w, 0); |
||
60 | } else { |
||
61 | return null; |
||
62 | } |
||
63 | |||
64 | $result .= $entry; |
||
65 | $dictionary[$dictSize++] = $w.$this->charAt($entry, 0); |
||
66 | $w = $entry; |
||
67 | } |
||
68 | |||
69 | return $result; |
||
70 | } |
||
71 | |||
72 | public function charAt(string $string, int $index): ?string |
||
73 | { |
||
74 | if ($index < mb_strlen($string)) { |
||
75 | return mb_substr($string, $index, 1, 'UTF-8'); |
||
76 | } |
||
77 | |||
78 | return null; |
||
79 | } |
||
80 | |||
81 | public function uniChr(int $u): string |
||
82 | { |
||
83 | return mb_convert_encoding('&#'.intval($u).';', 'UTF-8', 'HTML-ENTITIES'); |
||
84 | } |
||
85 | } |
||
86 |
If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: