faytekin /
lzw
| 1 | <?php |
||
| 2 | |||
| 3 | namespace Faytekin\LZW; |
||
| 4 | |||
| 5 | class LZW |
||
| 6 | { |
||
| 7 | public function compress(string $uncompress): string |
||
| 8 | { |
||
| 9 | $dictSize = 256; |
||
| 10 | $dictionary = []; |
||
| 11 | |||
| 12 | for ($i = 0; $i < 256; $i++) { |
||
| 13 | $dictionary[$this->uniChr($i)] = $i; |
||
| 14 | } |
||
| 15 | |||
| 16 | $w = ''; |
||
| 17 | $result = []; |
||
| 18 | |||
| 19 | for ($i = 0; $i < strlen($uncompress); $i++) { |
||
| 20 | $c = $this->charAt($uncompress, $i); |
||
| 21 | $wc = $w.$c; |
||
| 22 | |||
| 23 | if (isset($dictionary[$wc])) { |
||
| 24 | $w = $wc; |
||
| 25 | } else { |
||
| 26 | array_push($result, $dictionary[$w]); |
||
| 27 | |||
| 28 | $dictionary[$wc] = $dictSize++; |
||
| 29 | $w = strval($c); |
||
| 30 | } |
||
| 31 | } |
||
| 32 | |||
| 33 | if ($w != '') { |
||
| 34 | array_push($result, $dictionary[$w]); |
||
| 35 | } |
||
| 36 | |||
| 37 | return implode(',', $result); |
||
| 38 | } |
||
| 39 | |||
| 40 | public function decompress(string $compressed): ?string |
||
| 41 | { |
||
| 42 | $compressed = explode(',', $compressed); |
||
| 43 | $dictSize = 256; |
||
| 44 | $dictionary = []; |
||
| 45 | |||
| 46 | for ($i = 1; $i < $dictSize; $i++) { |
||
| 47 | $dictionary[$i] = $this->uniChr($i); |
||
| 48 | } |
||
| 49 | |||
| 50 | $w = $this->uniChr($compressed[0]); |
||
| 51 | $result = $w; |
||
| 52 | |||
| 53 | for ($i = 1; $i < count($compressed); $i++) { |
||
|
0 ignored issues
–
show
|
|||
| 54 | $k = $compressed[$i]; |
||
| 55 | |||
| 56 | if (isset($dictionary[$k])) { |
||
| 57 | $entry = $dictionary[$k]; |
||
| 58 | } elseif ($k == $dictSize) { |
||
| 59 | $entry = $w.$this->charAt($w, 0); |
||
| 60 | } else { |
||
| 61 | return null; |
||
| 62 | } |
||
| 63 | |||
| 64 | $result .= $entry; |
||
| 65 | $dictionary[$dictSize++] = $w.$this->charAt($entry, 0); |
||
| 66 | $w = $entry; |
||
| 67 | } |
||
| 68 | |||
| 69 | return $result; |
||
| 70 | } |
||
| 71 | |||
| 72 | public function charAt(string $string, int $index): ?string |
||
| 73 | { |
||
| 74 | if ($index < mb_strlen($string)) { |
||
| 75 | return mb_substr($string, $index, 1, 'UTF-8'); |
||
| 76 | } |
||
| 77 | |||
| 78 | return null; |
||
| 79 | } |
||
| 80 | |||
| 81 | public function uniChr(int $u): string |
||
| 82 | { |
||
| 83 | return mb_convert_encoding('&#'.intval($u).';', 'UTF-8', 'HTML-ENTITIES'); |
||
| 84 | } |
||
| 85 | } |
||
| 86 |
If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: