Total Complexity | 58 |
Total Lines | 340 |
Duplicated Lines | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
Complex classes like Decoder often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Decoder, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
15 | class Decoder |
||
16 | { |
||
17 | private $fileStream; |
||
18 | private $pointerBase; |
||
19 | private $pointerBaseByteSize; |
||
20 | // This is only used for unit testing |
||
21 | private $pointerTestHack; |
||
22 | private $switchByteOrder; |
||
23 | |||
24 | /** @ignore */ |
||
25 | const _EXTENDED = 0; |
||
26 | /** @ignore */ |
||
27 | const _POINTER = 1; |
||
28 | /** @ignore */ |
||
29 | const _UTF8_STRING = 2; |
||
30 | /** @ignore */ |
||
31 | const _DOUBLE = 3; |
||
32 | /** @ignore */ |
||
33 | const _BYTES = 4; |
||
34 | /** @ignore */ |
||
35 | const _UINT16 = 5; |
||
36 | /** @ignore */ |
||
37 | const _UINT32 = 6; |
||
38 | /** @ignore */ |
||
39 | const _MAP = 7; |
||
40 | /** @ignore */ |
||
41 | const _INT32 = 8; |
||
42 | /** @ignore */ |
||
43 | const _UINT64 = 9; |
||
44 | /** @ignore */ |
||
45 | const _UINT128 = 10; |
||
46 | /** @ignore */ |
||
47 | const _ARRAY = 11; |
||
48 | /** @ignore */ |
||
49 | const _CONTAINER = 12; |
||
50 | /** @ignore */ |
||
51 | const _END_MARKER = 13; |
||
52 | /** @ignore */ |
||
53 | const _BOOLEAN = 14; |
||
54 | /** @ignore */ |
||
55 | const _FLOAT = 15; |
||
56 | |||
57 | public function __construct( |
||
58 | $fileStream, |
||
59 | $pointerBase = 0, |
||
60 | $pointerTestHack = false |
||
61 | ) { |
||
62 | $this->fileStream = $fileStream; |
||
63 | $this->pointerBase = $pointerBase; |
||
64 | |||
65 | $this->pointerBaseByteSize = $pointerBase > 0 ? log($pointerBase, 2) / 8 : 0; |
||
66 | $this->pointerTestHack = $pointerTestHack; |
||
67 | |||
68 | $this->switchByteOrder = $this->isPlatformLittleEndian(); |
||
69 | } |
||
70 | |||
71 | public function decode($offset) |
||
72 | { |
||
73 | $ctrlByte = \ord(Util::read($this->fileStream, $offset, 1)); |
||
74 | ++$offset; |
||
75 | |||
76 | $type = $ctrlByte >> 5; |
||
77 | |||
78 | // Pointers are a special case, we don't read the next $size bytes, we |
||
79 | // use the size to determine the length of the pointer and then follow |
||
80 | // it. |
||
81 | if ($type === self::_POINTER) { |
||
82 | list($pointer, $offset) = $this->decodePointer($ctrlByte, $offset); |
||
83 | |||
84 | // for unit testing |
||
85 | if ($this->pointerTestHack) { |
||
86 | return [$pointer]; |
||
87 | } |
||
88 | |||
89 | list($result) = $this->decode($pointer); |
||
90 | |||
91 | return [$result, $offset]; |
||
92 | } |
||
93 | |||
94 | if ($type === self::_EXTENDED) { |
||
95 | $nextByte = \ord(Util::read($this->fileStream, $offset, 1)); |
||
96 | |||
97 | $type = $nextByte + 7; |
||
98 | |||
99 | if ($type < 8) { |
||
100 | throw new InvalidDatabaseException( |
||
101 | 'Something went horribly wrong in the decoder. An extended type ' |
||
102 | . 'resolved to a type number < 8 (' |
||
103 | . $type |
||
104 | . ')' |
||
105 | ); |
||
106 | } |
||
107 | |||
108 | ++$offset; |
||
109 | } |
||
110 | |||
111 | list($size, $offset) = $this->sizeFromCtrlByte($ctrlByte, $offset); |
||
112 | |||
113 | return $this->decodeByType($type, $offset, $size); |
||
114 | } |
||
115 | |||
116 | private function decodeByType($type, $offset, $size) |
||
117 | { |
||
118 | switch ($type) { |
||
119 | case self::_MAP: |
||
120 | return $this->decodeMap($size, $offset); |
||
121 | case self::_ARRAY: |
||
122 | return $this->decodeArray($size, $offset); |
||
123 | case self::_BOOLEAN: |
||
124 | return [$this->decodeBoolean($size), $offset]; |
||
125 | } |
||
126 | |||
127 | $newOffset = $offset + $size; |
||
128 | $bytes = Util::read($this->fileStream, $offset, $size); |
||
129 | switch ($type) { |
||
130 | case self::_BYTES: |
||
131 | case self::_UTF8_STRING: |
||
132 | return [$bytes, $newOffset]; |
||
133 | case self::_DOUBLE: |
||
134 | $this->verifySize(8, $size); |
||
135 | |||
136 | return [$this->decodeDouble($bytes), $newOffset]; |
||
137 | case self::_FLOAT: |
||
138 | $this->verifySize(4, $size); |
||
139 | |||
140 | return [$this->decodeFloat($bytes), $newOffset]; |
||
141 | case self::_INT32: |
||
142 | return [$this->decodeInt32($bytes, $size), $newOffset]; |
||
143 | case self::_UINT16: |
||
144 | case self::_UINT32: |
||
145 | case self::_UINT64: |
||
146 | case self::_UINT128: |
||
147 | return [$this->decodeUint($bytes, $size), $newOffset]; |
||
148 | default: |
||
149 | throw new InvalidDatabaseException( |
||
150 | 'Unknown or unexpected type: ' . $type |
||
151 | ); |
||
152 | } |
||
153 | } |
||
154 | |||
155 | private function verifySize($expected, $actual) |
||
160 | ); |
||
161 | } |
||
162 | } |
||
163 | |||
164 | private function decodeArray($size, $offset) |
||
165 | { |
||
166 | $array = []; |
||
167 | |||
168 | for ($i = 0; $i < $size; ++$i) { |
||
169 | list($value, $offset) = $this->decode($offset); |
||
170 | array_push($array, $value); |
||
171 | } |
||
172 | |||
173 | return [$array, $offset]; |
||
174 | } |
||
175 | |||
176 | private function decodeBoolean($size) |
||
177 | { |
||
178 | return $size === 0 ? false : true; |
||
179 | } |
||
180 | |||
181 | private function decodeDouble($bits) |
||
182 | { |
||
183 | // This assumes IEEE 754 doubles, but most (all?) modern platforms |
||
184 | // use them. |
||
185 | // |
||
186 | // We are not using the "E" format as that was only added in |
||
187 | // 7.0.15 and 7.1.1. As such, we must switch byte order on |
||
188 | // little endian machines. |
||
189 | list(, $double) = unpack('d', $this->maybeSwitchByteOrder($bits)); |
||
190 | |||
191 | return $double; |
||
192 | } |
||
193 | |||
194 | private function decodeFloat($bits) |
||
195 | { |
||
196 | // This assumes IEEE 754 floats, but most (all?) modern platforms |
||
197 | // use them. |
||
198 | // |
||
199 | // We are not using the "G" format as that was only added in |
||
200 | // 7.0.15 and 7.1.1. As such, we must switch byte order on |
||
201 | // little endian machines. |
||
202 | list(, $float) = unpack('f', $this->maybeSwitchByteOrder($bits)); |
||
203 | |||
204 | return $float; |
||
205 | } |
||
206 | |||
207 | private function decodeInt32($bytes, $size) |
||
208 | { |
||
209 | switch ($size) { |
||
210 | case 0: |
||
211 | return 0; |
||
212 | case 1: |
||
213 | case 2: |
||
214 | case 3: |
||
215 | $bytes = str_pad($bytes, 4, "\x00", STR_PAD_LEFT); |
||
216 | break; |
||
217 | case 4: |
||
218 | break; |
||
219 | default: |
||
220 | throw new InvalidDatabaseException( |
||
221 | "The MaxMind DB file's data section contains bad data (unknown data type or corrupt data)" |
||
222 | ); |
||
223 | } |
||
224 | |||
225 | list(, $int) = unpack('l', $this->maybeSwitchByteOrder($bytes)); |
||
226 | |||
227 | return $int; |
||
228 | } |
||
229 | |||
230 | private function decodeMap($size, $offset) |
||
231 | { |
||
232 | $map = []; |
||
233 | |||
234 | for ($i = 0; $i < $size; ++$i) { |
||
235 | list($key, $offset) = $this->decode($offset); |
||
236 | list($value, $offset) = $this->decode($offset); |
||
237 | $map[$key] = $value; |
||
238 | } |
||
239 | |||
240 | return [$map, $offset]; |
||
241 | } |
||
242 | |||
243 | private function decodePointer($ctrlByte, $offset) |
||
244 | { |
||
245 | $pointerSize = (($ctrlByte >> 3) & 0x3) + 1; |
||
246 | |||
247 | $buffer = Util::read($this->fileStream, $offset, $pointerSize); |
||
248 | $offset = $offset + $pointerSize; |
||
249 | |||
250 | switch ($pointerSize) { |
||
251 | case 1: |
||
252 | $packed = \chr($ctrlByte & 0x7) . $buffer; |
||
253 | list(, $pointer) = unpack('n', $packed); |
||
254 | $pointer += $this->pointerBase; |
||
255 | break; |
||
256 | case 2: |
||
257 | $packed = "\x00" . \chr($ctrlByte & 0x7) . $buffer; |
||
258 | list(, $pointer) = unpack('N', $packed); |
||
259 | $pointer += $this->pointerBase + 2048; |
||
260 | break; |
||
261 | case 3: |
||
262 | $packed = \chr($ctrlByte & 0x7) . $buffer; |
||
263 | |||
264 | // It is safe to use 'N' here, even on 32 bit machines as the |
||
265 | // first bit is 0. |
||
266 | list(, $pointer) = unpack('N', $packed); |
||
267 | $pointer += $this->pointerBase + 526336; |
||
268 | break; |
||
269 | case 4: |
||
270 | // We cannot use unpack here as we might overflow on 32 bit |
||
271 | // machines |
||
272 | $pointerOffset = $this->decodeUint($buffer, $pointerSize); |
||
273 | |||
274 | $byteLength = $pointerSize + $this->pointerBaseByteSize; |
||
275 | |||
276 | if ($byteLength <= _MM_MAX_INT_BYTES) { |
||
277 | $pointer = $pointerOffset + $this->pointerBase; |
||
278 | } elseif (\extension_loaded('gmp')) { |
||
279 | $pointer = gmp_strval(gmp_add($pointerOffset, $this->pointerBase)); |
||
280 | } elseif (\extension_loaded('bcmath')) { |
||
281 | $pointer = bcadd($pointerOffset, $this->pointerBase); |
||
282 | } else { |
||
283 | throw new RuntimeException( |
||
284 | 'The gmp or bcmath extension must be installed to read this database.' |
||
285 | ); |
||
286 | } |
||
287 | } |
||
288 | |||
289 | return [$pointer, $offset]; |
||
|
|||
290 | } |
||
291 | |||
292 | private function decodeUint($bytes, $byteLength) |
||
293 | { |
||
294 | if ($byteLength === 0) { |
||
295 | return 0; |
||
296 | } |
||
297 | |||
298 | $integer = 0; |
||
299 | |||
300 | for ($i = 0; $i < $byteLength; ++$i) { |
||
301 | $part = \ord($bytes[$i]); |
||
302 | |||
303 | // We only use gmp or bcmath if the final value is too big |
||
304 | if ($byteLength <= _MM_MAX_INT_BYTES) { |
||
305 | $integer = ($integer << 8) + $part; |
||
306 | } elseif (\extension_loaded('gmp')) { |
||
307 | $integer = gmp_strval(gmp_add(gmp_mul($integer, 256), $part)); |
||
308 | } elseif (\extension_loaded('bcmath')) { |
||
309 | $integer = bcadd(bcmul($integer, 256), $part); |
||
310 | } else { |
||
311 | throw new RuntimeException( |
||
312 | 'The gmp or bcmath extension must be installed to read this database.' |
||
313 | ); |
||
314 | } |
||
315 | } |
||
316 | |||
317 | return $integer; |
||
318 | } |
||
319 | |||
320 | private function sizeFromCtrlByte($ctrlByte, $offset) |
||
321 | { |
||
322 | $size = $ctrlByte & 0x1f; |
||
323 | |||
324 | if ($size < 29) { |
||
325 | return [$size, $offset]; |
||
326 | } |
||
327 | |||
328 | $bytesToRead = $size - 28; |
||
329 | $bytes = Util::read($this->fileStream, $offset, $bytesToRead); |
||
330 | |||
331 | if ($size === 29) { |
||
332 | $size = 29 + \ord($bytes); |
||
333 | } elseif ($size === 30) { |
||
334 | list(, $adjust) = unpack('n', $bytes); |
||
335 | $size = 285 + $adjust; |
||
336 | } elseif ($size > 30) { |
||
337 | list(, $adjust) = unpack('N', "\x00" . $bytes); |
||
338 | $size = $adjust + 65821; |
||
339 | } |
||
340 | |||
341 | return [$size, $offset + $bytesToRead]; |
||
342 | } |
||
343 | |||
344 | private function maybeSwitchByteOrder($bytes) |
||
347 | } |
||
348 | |||
349 | private function isPlatformLittleEndian() |
||
350 | { |
||
351 | $testint = 0x00FF; |
||
352 | $packed = pack('S', $testint); |
||
353 | |||
354 | return $testint === current(unpack('v', $packed)); |
||
355 | } |
||
356 | } |
||
357 |