| Total Complexity | 58 |
| Total Lines | 340 |
| Duplicated Lines | 0 % |
| Changes | 1 | ||
| Bugs | 0 | Features | 0 |
Complex classes like Decoder often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Decoder, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 15 | class Decoder |
||
| 16 | { |
||
| 17 | private $fileStream; |
||
| 18 | private $pointerBase; |
||
| 19 | private $pointerBaseByteSize; |
||
| 20 | // This is only used for unit testing |
||
| 21 | private $pointerTestHack; |
||
| 22 | private $switchByteOrder; |
||
| 23 | |||
| 24 | /** @ignore */ |
||
| 25 | const _EXTENDED = 0; |
||
| 26 | /** @ignore */ |
||
| 27 | const _POINTER = 1; |
||
| 28 | /** @ignore */ |
||
| 29 | const _UTF8_STRING = 2; |
||
| 30 | /** @ignore */ |
||
| 31 | const _DOUBLE = 3; |
||
| 32 | /** @ignore */ |
||
| 33 | const _BYTES = 4; |
||
| 34 | /** @ignore */ |
||
| 35 | const _UINT16 = 5; |
||
| 36 | /** @ignore */ |
||
| 37 | const _UINT32 = 6; |
||
| 38 | /** @ignore */ |
||
| 39 | const _MAP = 7; |
||
| 40 | /** @ignore */ |
||
| 41 | const _INT32 = 8; |
||
| 42 | /** @ignore */ |
||
| 43 | const _UINT64 = 9; |
||
| 44 | /** @ignore */ |
||
| 45 | const _UINT128 = 10; |
||
| 46 | /** @ignore */ |
||
| 47 | const _ARRAY = 11; |
||
| 48 | /** @ignore */ |
||
| 49 | const _CONTAINER = 12; |
||
| 50 | /** @ignore */ |
||
| 51 | const _END_MARKER = 13; |
||
| 52 | /** @ignore */ |
||
| 53 | const _BOOLEAN = 14; |
||
| 54 | /** @ignore */ |
||
| 55 | const _FLOAT = 15; |
||
| 56 | |||
| 57 | public function __construct( |
||
| 58 | $fileStream, |
||
| 59 | $pointerBase = 0, |
||
| 60 | $pointerTestHack = false |
||
| 61 | ) { |
||
| 62 | $this->fileStream = $fileStream; |
||
| 63 | $this->pointerBase = $pointerBase; |
||
| 64 | |||
| 65 | $this->pointerBaseByteSize = $pointerBase > 0 ? log($pointerBase, 2) / 8 : 0; |
||
| 66 | $this->pointerTestHack = $pointerTestHack; |
||
| 67 | |||
| 68 | $this->switchByteOrder = $this->isPlatformLittleEndian(); |
||
| 69 | } |
||
| 70 | |||
| 71 | public function decode($offset) |
||
| 72 | { |
||
| 73 | $ctrlByte = \ord(Util::read($this->fileStream, $offset, 1)); |
||
| 74 | ++$offset; |
||
| 75 | |||
| 76 | $type = $ctrlByte >> 5; |
||
| 77 | |||
| 78 | // Pointers are a special case, we don't read the next $size bytes, we |
||
| 79 | // use the size to determine the length of the pointer and then follow |
||
| 80 | // it. |
||
| 81 | if ($type === self::_POINTER) { |
||
| 82 | list($pointer, $offset) = $this->decodePointer($ctrlByte, $offset); |
||
| 83 | |||
| 84 | // for unit testing |
||
| 85 | if ($this->pointerTestHack) { |
||
| 86 | return [$pointer]; |
||
| 87 | } |
||
| 88 | |||
| 89 | list($result) = $this->decode($pointer); |
||
| 90 | |||
| 91 | return [$result, $offset]; |
||
| 92 | } |
||
| 93 | |||
| 94 | if ($type === self::_EXTENDED) { |
||
| 95 | $nextByte = \ord(Util::read($this->fileStream, $offset, 1)); |
||
| 96 | |||
| 97 | $type = $nextByte + 7; |
||
| 98 | |||
| 99 | if ($type < 8) { |
||
| 100 | throw new InvalidDatabaseException( |
||
| 101 | 'Something went horribly wrong in the decoder. An extended type ' |
||
| 102 | . 'resolved to a type number < 8 (' |
||
| 103 | . $type |
||
| 104 | . ')' |
||
| 105 | ); |
||
| 106 | } |
||
| 107 | |||
| 108 | ++$offset; |
||
| 109 | } |
||
| 110 | |||
| 111 | list($size, $offset) = $this->sizeFromCtrlByte($ctrlByte, $offset); |
||
| 112 | |||
| 113 | return $this->decodeByType($type, $offset, $size); |
||
| 114 | } |
||
| 115 | |||
| 116 | private function decodeByType($type, $offset, $size) |
||
| 117 | { |
||
| 118 | switch ($type) { |
||
| 119 | case self::_MAP: |
||
| 120 | return $this->decodeMap($size, $offset); |
||
| 121 | case self::_ARRAY: |
||
| 122 | return $this->decodeArray($size, $offset); |
||
| 123 | case self::_BOOLEAN: |
||
| 124 | return [$this->decodeBoolean($size), $offset]; |
||
| 125 | } |
||
| 126 | |||
| 127 | $newOffset = $offset + $size; |
||
| 128 | $bytes = Util::read($this->fileStream, $offset, $size); |
||
| 129 | switch ($type) { |
||
| 130 | case self::_BYTES: |
||
| 131 | case self::_UTF8_STRING: |
||
| 132 | return [$bytes, $newOffset]; |
||
| 133 | case self::_DOUBLE: |
||
| 134 | $this->verifySize(8, $size); |
||
| 135 | |||
| 136 | return [$this->decodeDouble($bytes), $newOffset]; |
||
| 137 | case self::_FLOAT: |
||
| 138 | $this->verifySize(4, $size); |
||
| 139 | |||
| 140 | return [$this->decodeFloat($bytes), $newOffset]; |
||
| 141 | case self::_INT32: |
||
| 142 | return [$this->decodeInt32($bytes, $size), $newOffset]; |
||
| 143 | case self::_UINT16: |
||
| 144 | case self::_UINT32: |
||
| 145 | case self::_UINT64: |
||
| 146 | case self::_UINT128: |
||
| 147 | return [$this->decodeUint($bytes, $size), $newOffset]; |
||
| 148 | default: |
||
| 149 | throw new InvalidDatabaseException( |
||
| 150 | 'Unknown or unexpected type: ' . $type |
||
| 151 | ); |
||
| 152 | } |
||
| 153 | } |
||
| 154 | |||
| 155 | private function verifySize($expected, $actual) |
||
| 160 | ); |
||
| 161 | } |
||
| 162 | } |
||
| 163 | |||
| 164 | private function decodeArray($size, $offset) |
||
| 165 | { |
||
| 166 | $array = []; |
||
| 167 | |||
| 168 | for ($i = 0; $i < $size; ++$i) { |
||
| 169 | list($value, $offset) = $this->decode($offset); |
||
| 170 | array_push($array, $value); |
||
| 171 | } |
||
| 172 | |||
| 173 | return [$array, $offset]; |
||
| 174 | } |
||
| 175 | |||
| 176 | private function decodeBoolean($size) |
||
| 177 | { |
||
| 178 | return $size === 0 ? false : true; |
||
| 179 | } |
||
| 180 | |||
| 181 | private function decodeDouble($bits) |
||
| 182 | { |
||
| 183 | // This assumes IEEE 754 doubles, but most (all?) modern platforms |
||
| 184 | // use them. |
||
| 185 | // |
||
| 186 | // We are not using the "E" format as that was only added in |
||
| 187 | // 7.0.15 and 7.1.1. As such, we must switch byte order on |
||
| 188 | // little endian machines. |
||
| 189 | list(, $double) = unpack('d', $this->maybeSwitchByteOrder($bits)); |
||
| 190 | |||
| 191 | return $double; |
||
| 192 | } |
||
| 193 | |||
| 194 | private function decodeFloat($bits) |
||
| 195 | { |
||
| 196 | // This assumes IEEE 754 floats, but most (all?) modern platforms |
||
| 197 | // use them. |
||
| 198 | // |
||
| 199 | // We are not using the "G" format as that was only added in |
||
| 200 | // 7.0.15 and 7.1.1. As such, we must switch byte order on |
||
| 201 | // little endian machines. |
||
| 202 | list(, $float) = unpack('f', $this->maybeSwitchByteOrder($bits)); |
||
| 203 | |||
| 204 | return $float; |
||
| 205 | } |
||
| 206 | |||
| 207 | private function decodeInt32($bytes, $size) |
||
| 208 | { |
||
| 209 | switch ($size) { |
||
| 210 | case 0: |
||
| 211 | return 0; |
||
| 212 | case 1: |
||
| 213 | case 2: |
||
| 214 | case 3: |
||
| 215 | $bytes = str_pad($bytes, 4, "\x00", STR_PAD_LEFT); |
||
| 216 | break; |
||
| 217 | case 4: |
||
| 218 | break; |
||
| 219 | default: |
||
| 220 | throw new InvalidDatabaseException( |
||
| 221 | "The MaxMind DB file's data section contains bad data (unknown data type or corrupt data)" |
||
| 222 | ); |
||
| 223 | } |
||
| 224 | |||
| 225 | list(, $int) = unpack('l', $this->maybeSwitchByteOrder($bytes)); |
||
| 226 | |||
| 227 | return $int; |
||
| 228 | } |
||
| 229 | |||
| 230 | private function decodeMap($size, $offset) |
||
| 231 | { |
||
| 232 | $map = []; |
||
| 233 | |||
| 234 | for ($i = 0; $i < $size; ++$i) { |
||
| 235 | list($key, $offset) = $this->decode($offset); |
||
| 236 | list($value, $offset) = $this->decode($offset); |
||
| 237 | $map[$key] = $value; |
||
| 238 | } |
||
| 239 | |||
| 240 | return [$map, $offset]; |
||
| 241 | } |
||
| 242 | |||
| 243 | private function decodePointer($ctrlByte, $offset) |
||
| 244 | { |
||
| 245 | $pointerSize = (($ctrlByte >> 3) & 0x3) + 1; |
||
| 246 | |||
| 247 | $buffer = Util::read($this->fileStream, $offset, $pointerSize); |
||
| 248 | $offset = $offset + $pointerSize; |
||
| 249 | |||
| 250 | switch ($pointerSize) { |
||
| 251 | case 1: |
||
| 252 | $packed = \chr($ctrlByte & 0x7) . $buffer; |
||
| 253 | list(, $pointer) = unpack('n', $packed); |
||
| 254 | $pointer += $this->pointerBase; |
||
| 255 | break; |
||
| 256 | case 2: |
||
| 257 | $packed = "\x00" . \chr($ctrlByte & 0x7) . $buffer; |
||
| 258 | list(, $pointer) = unpack('N', $packed); |
||
| 259 | $pointer += $this->pointerBase + 2048; |
||
| 260 | break; |
||
| 261 | case 3: |
||
| 262 | $packed = \chr($ctrlByte & 0x7) . $buffer; |
||
| 263 | |||
| 264 | // It is safe to use 'N' here, even on 32 bit machines as the |
||
| 265 | // first bit is 0. |
||
| 266 | list(, $pointer) = unpack('N', $packed); |
||
| 267 | $pointer += $this->pointerBase + 526336; |
||
| 268 | break; |
||
| 269 | case 4: |
||
| 270 | // We cannot use unpack here as we might overflow on 32 bit |
||
| 271 | // machines |
||
| 272 | $pointerOffset = $this->decodeUint($buffer, $pointerSize); |
||
| 273 | |||
| 274 | $byteLength = $pointerSize + $this->pointerBaseByteSize; |
||
| 275 | |||
| 276 | if ($byteLength <= _MM_MAX_INT_BYTES) { |
||
| 277 | $pointer = $pointerOffset + $this->pointerBase; |
||
| 278 | } elseif (\extension_loaded('gmp')) { |
||
| 279 | $pointer = gmp_strval(gmp_add($pointerOffset, $this->pointerBase)); |
||
| 280 | } elseif (\extension_loaded('bcmath')) { |
||
| 281 | $pointer = bcadd($pointerOffset, $this->pointerBase); |
||
| 282 | } else { |
||
| 283 | throw new RuntimeException( |
||
| 284 | 'The gmp or bcmath extension must be installed to read this database.' |
||
| 285 | ); |
||
| 286 | } |
||
| 287 | } |
||
| 288 | |||
| 289 | return [$pointer, $offset]; |
||
|
|
|||
| 290 | } |
||
| 291 | |||
| 292 | private function decodeUint($bytes, $byteLength) |
||
| 293 | { |
||
| 294 | if ($byteLength === 0) { |
||
| 295 | return 0; |
||
| 296 | } |
||
| 297 | |||
| 298 | $integer = 0; |
||
| 299 | |||
| 300 | for ($i = 0; $i < $byteLength; ++$i) { |
||
| 301 | $part = \ord($bytes[$i]); |
||
| 302 | |||
| 303 | // We only use gmp or bcmath if the final value is too big |
||
| 304 | if ($byteLength <= _MM_MAX_INT_BYTES) { |
||
| 305 | $integer = ($integer << 8) + $part; |
||
| 306 | } elseif (\extension_loaded('gmp')) { |
||
| 307 | $integer = gmp_strval(gmp_add(gmp_mul($integer, 256), $part)); |
||
| 308 | } elseif (\extension_loaded('bcmath')) { |
||
| 309 | $integer = bcadd(bcmul($integer, 256), $part); |
||
| 310 | } else { |
||
| 311 | throw new RuntimeException( |
||
| 312 | 'The gmp or bcmath extension must be installed to read this database.' |
||
| 313 | ); |
||
| 314 | } |
||
| 315 | } |
||
| 316 | |||
| 317 | return $integer; |
||
| 318 | } |
||
| 319 | |||
| 320 | private function sizeFromCtrlByte($ctrlByte, $offset) |
||
| 321 | { |
||
| 322 | $size = $ctrlByte & 0x1f; |
||
| 323 | |||
| 324 | if ($size < 29) { |
||
| 325 | return [$size, $offset]; |
||
| 326 | } |
||
| 327 | |||
| 328 | $bytesToRead = $size - 28; |
||
| 329 | $bytes = Util::read($this->fileStream, $offset, $bytesToRead); |
||
| 330 | |||
| 331 | if ($size === 29) { |
||
| 332 | $size = 29 + \ord($bytes); |
||
| 333 | } elseif ($size === 30) { |
||
| 334 | list(, $adjust) = unpack('n', $bytes); |
||
| 335 | $size = 285 + $adjust; |
||
| 336 | } elseif ($size > 30) { |
||
| 337 | list(, $adjust) = unpack('N', "\x00" . $bytes); |
||
| 338 | $size = $adjust + 65821; |
||
| 339 | } |
||
| 340 | |||
| 341 | return [$size, $offset + $bytesToRead]; |
||
| 342 | } |
||
| 343 | |||
| 344 | private function maybeSwitchByteOrder($bytes) |
||
| 347 | } |
||
| 348 | |||
| 349 | private function isPlatformLittleEndian() |
||
| 350 | { |
||
| 351 | $testint = 0x00FF; |
||
| 352 | $packed = pack('S', $testint); |
||
| 353 | |||
| 354 | return $testint === current(unpack('v', $packed)); |
||
| 355 | } |
||
| 356 | } |
||
| 357 |