| Total Complexity | 49 |
| Total Lines | 300 |
| Duplicated Lines | 0 % |
| Changes | 1 | ||
| Bugs | 0 | Features | 0 |
Complex classes like Decoder often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Decoder, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 8 | class Decoder |
||
| 9 | { |
||
| 10 | |||
| 11 | private $fileStream; |
||
| 12 | private $pointerBase; |
||
| 13 | // This is only used for unit testing |
||
| 14 | private $pointerTestHack; |
||
| 15 | private $switchByteOrder; |
||
| 16 | |||
| 17 | private $types = array( |
||
| 18 | 0 => 'extended', |
||
| 19 | 1 => 'pointer', |
||
| 20 | 2 => 'utf8_string', |
||
| 21 | 3 => 'double', |
||
| 22 | 4 => 'bytes', |
||
| 23 | 5 => 'uint16', |
||
| 24 | 6 => 'uint32', |
||
| 25 | 7 => 'map', |
||
| 26 | 8 => 'int32', |
||
| 27 | 9 => 'uint64', |
||
| 28 | 10 => 'uint128', |
||
| 29 | 11 => 'array', |
||
| 30 | 12 => 'container', |
||
| 31 | 13 => 'end_marker', |
||
| 32 | 14 => 'boolean', |
||
| 33 | 15 => 'float', |
||
| 34 | ); |
||
| 35 | |||
| 36 | public function __construct( |
||
| 37 | $fileStream, |
||
| 38 | $pointerBase = 0, |
||
| 39 | $pointerTestHack = false |
||
| 40 | ) { |
||
| 41 | $this->fileStream = $fileStream; |
||
| 42 | $this->pointerBase = $pointerBase; |
||
| 43 | $this->pointerTestHack = $pointerTestHack; |
||
| 44 | |||
| 45 | $this->switchByteOrder = $this->isPlatformLittleEndian(); |
||
| 46 | } |
||
| 47 | |||
| 48 | |||
| 49 | public function decode($offset) |
||
| 50 | { |
||
| 51 | list(, $ctrlByte) = unpack( |
||
| 52 | 'C', |
||
| 53 | Util::read($this->fileStream, $offset, 1) |
||
| 54 | ); |
||
| 55 | $offset++; |
||
| 56 | |||
| 57 | $type = $this->types[$ctrlByte >> 5]; |
||
| 58 | |||
| 59 | // Pointers are a special case, we don't read the next $size bytes, we |
||
| 60 | // use the size to determine the length of the pointer and then follow |
||
| 61 | // it. |
||
| 62 | if ($type == 'pointer') { |
||
| 63 | list($pointer, $offset) = $this->decodePointer($ctrlByte, $offset); |
||
| 64 | |||
| 65 | // for unit testing |
||
| 66 | if ($this->pointerTestHack) { |
||
| 67 | return array($pointer); |
||
| 68 | } |
||
| 69 | |||
| 70 | list($result) = $this->decode($pointer); |
||
| 71 | |||
| 72 | return array($result, $offset); |
||
| 73 | } |
||
| 74 | |||
| 75 | if ($type == 'extended') { |
||
| 76 | list(, $nextByte) = unpack( |
||
| 77 | 'C', |
||
| 78 | Util::read($this->fileStream, $offset, 1) |
||
| 79 | ); |
||
| 80 | |||
| 81 | $typeNum = $nextByte + 7; |
||
| 82 | |||
| 83 | if ($typeNum < 8) { |
||
| 84 | throw new InvalidDatabaseException( |
||
| 85 | "Something went horribly wrong in the decoder. An extended type " |
||
| 86 | . "resolved to a type number < 8 (" |
||
| 87 | . $this->types[$typeNum] |
||
| 88 | . ")" |
||
| 89 | ); |
||
| 90 | } |
||
| 91 | |||
| 92 | $type = $this->types[$typeNum]; |
||
| 93 | $offset++; |
||
| 94 | } |
||
| 95 | |||
| 96 | list($size, $offset) = $this->sizeFromCtrlByte($ctrlByte, $offset); |
||
| 97 | |||
| 98 | return $this->decodeByType($type, $offset, $size); |
||
| 99 | } |
||
| 100 | |||
| 101 | private function decodeByType($type, $offset, $size) |
||
| 102 | { |
||
| 103 | switch ($type) { |
||
| 104 | case 'map': |
||
| 105 | return $this->decodeMap($size, $offset); |
||
| 106 | case 'array': |
||
| 107 | return $this->decodeArray($size, $offset); |
||
| 108 | case 'boolean': |
||
| 109 | return array($this->decodeBoolean($size), $offset); |
||
| 110 | } |
||
| 111 | |||
| 112 | $newOffset = $offset + $size; |
||
| 113 | $bytes = Util::read($this->fileStream, $offset, $size); |
||
| 114 | switch ($type) { |
||
| 115 | case 'utf8_string': |
||
| 116 | return array($this->decodeString($bytes), $newOffset); |
||
| 117 | case 'double': |
||
| 118 | $this->verifySize(8, $size); |
||
| 119 | return array($this->decodeDouble($bytes), $newOffset); |
||
| 120 | case 'float': |
||
| 121 | $this->verifySize(4, $size); |
||
| 122 | return array($this->decodeFloat($bytes), $newOffset); |
||
| 123 | case 'bytes': |
||
| 124 | return array($bytes, $newOffset); |
||
| 125 | case 'uint16': |
||
| 126 | case 'uint32': |
||
| 127 | return array($this->decodeUint($bytes), $newOffset); |
||
| 128 | case 'int32': |
||
| 129 | return array($this->decodeInt32($bytes), $newOffset); |
||
| 130 | case 'uint64': |
||
| 131 | case 'uint128': |
||
| 132 | return array($this->decodeBigUint($bytes, $size), $newOffset); |
||
| 133 | default: |
||
| 134 | throw new InvalidDatabaseException( |
||
| 135 | "Unknown or unexpected type: " . $type |
||
| 136 | ); |
||
| 137 | } |
||
| 138 | } |
||
| 139 | |||
| 140 | private function verifySize($expected, $actual) |
||
| 145 | ); |
||
| 146 | } |
||
| 147 | } |
||
| 148 | |||
| 149 | private function decodeArray($size, $offset) |
||
| 150 | { |
||
| 151 | $array = array(); |
||
| 152 | |||
| 153 | for ($i = 0; $i < $size; $i++) { |
||
| 154 | list($value, $offset) = $this->decode($offset); |
||
| 155 | array_push($array, $value); |
||
| 156 | } |
||
| 157 | |||
| 158 | return array($array, $offset); |
||
| 159 | } |
||
| 160 | |||
| 161 | private function decodeBoolean($size) |
||
| 162 | { |
||
| 163 | return $size == 0 ? false : true; |
||
| 164 | } |
||
| 165 | |||
| 166 | private function decodeDouble($bits) |
||
| 167 | { |
||
| 168 | // XXX - Assumes IEEE 754 double on platform |
||
| 169 | list(, $double) = unpack('d', $this->maybeSwitchByteOrder($bits)); |
||
| 170 | return $double; |
||
| 171 | } |
||
| 172 | |||
| 173 | private function decodeFloat($bits) |
||
| 174 | { |
||
| 175 | // XXX - Assumes IEEE 754 floats on platform |
||
| 176 | list(, $float) = unpack('f', $this->maybeSwitchByteOrder($bits)); |
||
| 177 | return $float; |
||
| 178 | } |
||
| 179 | |||
| 180 | private function decodeInt32($bytes) |
||
| 181 | { |
||
| 182 | $bytes = $this->zeroPadLeft($bytes, 4); |
||
| 183 | list(, $int) = unpack('l', $this->maybeSwitchByteOrder($bytes)); |
||
| 184 | return $int; |
||
| 185 | } |
||
| 186 | |||
| 187 | private function decodeMap($size, $offset) |
||
| 188 | { |
||
| 189 | |||
| 190 | $map = array(); |
||
| 191 | |||
| 192 | for ($i = 0; $i < $size; $i++) { |
||
| 193 | list($key, $offset) = $this->decode($offset); |
||
| 194 | list($value, $offset) = $this->decode($offset); |
||
| 195 | $map[$key] = $value; |
||
| 196 | } |
||
| 197 | |||
| 198 | return array($map, $offset); |
||
| 199 | } |
||
| 200 | |||
| 201 | private $pointerValueOffset = array( |
||
| 202 | 1 => 0, |
||
| 203 | 2 => 2048, |
||
| 204 | 3 => 526336, |
||
| 205 | 4 => 0, |
||
| 206 | ); |
||
| 207 | |||
| 208 | private function decodePointer($ctrlByte, $offset) |
||
| 209 | { |
||
| 210 | $pointerSize = (($ctrlByte >> 3) & 0x3) + 1; |
||
| 211 | |||
| 212 | $buffer = Util::read($this->fileStream, $offset, $pointerSize); |
||
| 213 | $offset = $offset + $pointerSize; |
||
| 214 | |||
| 215 | $packed = $pointerSize == 4 |
||
| 216 | ? $buffer |
||
| 217 | : (pack('C', $ctrlByte & 0x7)) . $buffer; |
||
| 218 | |||
| 219 | $unpacked = $this->decodeUint($packed); |
||
| 220 | $pointer = $unpacked + $this->pointerBase |
||
| 221 | + $this->pointerValueOffset[$pointerSize]; |
||
| 222 | |||
| 223 | return array($pointer, $offset); |
||
| 224 | } |
||
| 225 | |||
| 226 | private function decodeUint($bytes) |
||
| 227 | { |
||
| 228 | list(, $int) = unpack('N', $this->zeroPadLeft($bytes, 4)); |
||
| 229 | return $int; |
||
| 230 | } |
||
| 231 | |||
| 232 | private function decodeBigUint($bytes, $byteLength) |
||
| 233 | { |
||
| 234 | $maxUintBytes = log(PHP_INT_MAX, 2) / 8; |
||
| 235 | |||
| 236 | if ($byteLength == 0) { |
||
| 237 | return 0; |
||
| 238 | } |
||
| 239 | |||
| 240 | $numberOfLongs = ceil($byteLength / 4); |
||
| 241 | $paddedLength = $numberOfLongs * 4; |
||
| 242 | $paddedBytes = $this->zeroPadLeft($bytes, $paddedLength); |
||
| 243 | $unpacked = array_merge(unpack("N$numberOfLongs", $paddedBytes)); |
||
|
|
|||
| 244 | |||
| 245 | $integer = 0; |
||
| 246 | |||
| 247 | // 2^32 |
||
| 248 | $twoTo32 = '4294967296'; |
||
| 249 | |||
| 250 | foreach ($unpacked as $part) { |
||
| 251 | // We only use gmp or bcmath if the final value is too big |
||
| 252 | if ($byteLength <= $maxUintBytes) { |
||
| 253 | $integer = ($integer << 32) + $part; |
||
| 254 | } elseif (extension_loaded('gmp')) { |
||
| 255 | $integer = gmp_strval(gmp_add(gmp_mul($integer, $twoTo32), $part)); |
||
| 256 | } elseif (extension_loaded('bcmath')) { |
||
| 257 | $integer = bcadd(bcmul($integer, $twoTo32), $part); |
||
| 258 | } else { |
||
| 259 | throw new \RuntimeException( |
||
| 260 | 'The gmp or bcmath extension must be installed to read this database.' |
||
| 261 | ); |
||
| 262 | } |
||
| 263 | } |
||
| 264 | return $integer; |
||
| 265 | } |
||
| 266 | |||
| 267 | private function decodeString($bytes) |
||
| 268 | { |
||
| 269 | // XXX - NOOP. As far as I know, the end user has to explicitly set the |
||
| 270 | // encoding in PHP. Strings are just bytes. |
||
| 271 | return $bytes; |
||
| 272 | } |
||
| 273 | |||
| 274 | private function sizeFromCtrlByte($ctrlByte, $offset) |
||
| 275 | { |
||
| 276 | $size = $ctrlByte & 0x1f; |
||
| 277 | $bytesToRead = $size < 29 ? 0 : $size - 28; |
||
| 278 | $bytes = Util::read($this->fileStream, $offset, $bytesToRead); |
||
| 279 | $decoded = $this->decodeUint($bytes); |
||
| 280 | |||
| 281 | if ($size == 29) { |
||
| 282 | $size = 29 + $decoded; |
||
| 283 | } elseif ($size == 30) { |
||
| 284 | $size = 285 + $decoded; |
||
| 285 | } elseif ($size > 30) { |
||
| 286 | $size = ($decoded & (0x0FFFFFFF >> (32 - (8 * $bytesToRead)))) |
||
| 287 | + 65821; |
||
| 288 | } |
||
| 289 | |||
| 290 | return array($size, $offset + $bytesToRead); |
||
| 291 | } |
||
| 292 | |||
| 293 | private function zeroPadLeft($content, $desiredLength) |
||
| 294 | { |
||
| 295 | return str_pad($content, $desiredLength, "\x00", STR_PAD_LEFT); |
||
| 296 | } |
||
| 297 | |||
| 298 | private function maybeSwitchByteOrder($bytes) |
||
| 301 | } |
||
| 302 | |||
| 303 | private function isPlatformLittleEndian() |
||
| 304 | { |
||
| 305 | $testint = 0x00FF; |
||
| 306 | $packed = pack('S', $testint); |
||
| 307 | return $testint === current(unpack('v', $packed)); |
||
| 308 | } |
||
| 309 | } |
||
| 310 |