Total Complexity | 49 |
Total Lines | 300 |
Duplicated Lines | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
Complex classes like Decoder often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Decoder, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
8 | class Decoder |
||
9 | { |
||
10 | |||
11 | private $fileStream; |
||
12 | private $pointerBase; |
||
13 | // This is only used for unit testing |
||
14 | private $pointerTestHack; |
||
15 | private $switchByteOrder; |
||
16 | |||
17 | private $types = array( |
||
18 | 0 => 'extended', |
||
19 | 1 => 'pointer', |
||
20 | 2 => 'utf8_string', |
||
21 | 3 => 'double', |
||
22 | 4 => 'bytes', |
||
23 | 5 => 'uint16', |
||
24 | 6 => 'uint32', |
||
25 | 7 => 'map', |
||
26 | 8 => 'int32', |
||
27 | 9 => 'uint64', |
||
28 | 10 => 'uint128', |
||
29 | 11 => 'array', |
||
30 | 12 => 'container', |
||
31 | 13 => 'end_marker', |
||
32 | 14 => 'boolean', |
||
33 | 15 => 'float', |
||
34 | ); |
||
35 | |||
36 | public function __construct( |
||
37 | $fileStream, |
||
38 | $pointerBase = 0, |
||
39 | $pointerTestHack = false |
||
40 | ) { |
||
41 | $this->fileStream = $fileStream; |
||
42 | $this->pointerBase = $pointerBase; |
||
43 | $this->pointerTestHack = $pointerTestHack; |
||
44 | |||
45 | $this->switchByteOrder = $this->isPlatformLittleEndian(); |
||
46 | } |
||
47 | |||
48 | |||
49 | public function decode($offset) |
||
50 | { |
||
51 | list(, $ctrlByte) = unpack( |
||
52 | 'C', |
||
53 | Util::read($this->fileStream, $offset, 1) |
||
54 | ); |
||
55 | $offset++; |
||
56 | |||
57 | $type = $this->types[$ctrlByte >> 5]; |
||
58 | |||
59 | // Pointers are a special case, we don't read the next $size bytes, we |
||
60 | // use the size to determine the length of the pointer and then follow |
||
61 | // it. |
||
62 | if ($type == 'pointer') { |
||
63 | list($pointer, $offset) = $this->decodePointer($ctrlByte, $offset); |
||
64 | |||
65 | // for unit testing |
||
66 | if ($this->pointerTestHack) { |
||
67 | return array($pointer); |
||
68 | } |
||
69 | |||
70 | list($result) = $this->decode($pointer); |
||
71 | |||
72 | return array($result, $offset); |
||
73 | } |
||
74 | |||
75 | if ($type == 'extended') { |
||
76 | list(, $nextByte) = unpack( |
||
77 | 'C', |
||
78 | Util::read($this->fileStream, $offset, 1) |
||
79 | ); |
||
80 | |||
81 | $typeNum = $nextByte + 7; |
||
82 | |||
83 | if ($typeNum < 8) { |
||
84 | throw new InvalidDatabaseException( |
||
85 | "Something went horribly wrong in the decoder. An extended type " |
||
86 | . "resolved to a type number < 8 (" |
||
87 | . $this->types[$typeNum] |
||
88 | . ")" |
||
89 | ); |
||
90 | } |
||
91 | |||
92 | $type = $this->types[$typeNum]; |
||
93 | $offset++; |
||
94 | } |
||
95 | |||
96 | list($size, $offset) = $this->sizeFromCtrlByte($ctrlByte, $offset); |
||
97 | |||
98 | return $this->decodeByType($type, $offset, $size); |
||
99 | } |
||
100 | |||
101 | private function decodeByType($type, $offset, $size) |
||
102 | { |
||
103 | switch ($type) { |
||
104 | case 'map': |
||
105 | return $this->decodeMap($size, $offset); |
||
106 | case 'array': |
||
107 | return $this->decodeArray($size, $offset); |
||
108 | case 'boolean': |
||
109 | return array($this->decodeBoolean($size), $offset); |
||
110 | } |
||
111 | |||
112 | $newOffset = $offset + $size; |
||
113 | $bytes = Util::read($this->fileStream, $offset, $size); |
||
114 | switch ($type) { |
||
115 | case 'utf8_string': |
||
116 | return array($this->decodeString($bytes), $newOffset); |
||
117 | case 'double': |
||
118 | $this->verifySize(8, $size); |
||
119 | return array($this->decodeDouble($bytes), $newOffset); |
||
120 | case 'float': |
||
121 | $this->verifySize(4, $size); |
||
122 | return array($this->decodeFloat($bytes), $newOffset); |
||
123 | case 'bytes': |
||
124 | return array($bytes, $newOffset); |
||
125 | case 'uint16': |
||
126 | case 'uint32': |
||
127 | return array($this->decodeUint($bytes), $newOffset); |
||
128 | case 'int32': |
||
129 | return array($this->decodeInt32($bytes), $newOffset); |
||
130 | case 'uint64': |
||
131 | case 'uint128': |
||
132 | return array($this->decodeBigUint($bytes, $size), $newOffset); |
||
133 | default: |
||
134 | throw new InvalidDatabaseException( |
||
135 | "Unknown or unexpected type: " . $type |
||
136 | ); |
||
137 | } |
||
138 | } |
||
139 | |||
140 | private function verifySize($expected, $actual) |
||
145 | ); |
||
146 | } |
||
147 | } |
||
148 | |||
149 | private function decodeArray($size, $offset) |
||
150 | { |
||
151 | $array = array(); |
||
152 | |||
153 | for ($i = 0; $i < $size; $i++) { |
||
154 | list($value, $offset) = $this->decode($offset); |
||
155 | array_push($array, $value); |
||
156 | } |
||
157 | |||
158 | return array($array, $offset); |
||
159 | } |
||
160 | |||
161 | private function decodeBoolean($size) |
||
162 | { |
||
163 | return $size == 0 ? false : true; |
||
164 | } |
||
165 | |||
166 | private function decodeDouble($bits) |
||
167 | { |
||
168 | // XXX - Assumes IEEE 754 double on platform |
||
169 | list(, $double) = unpack('d', $this->maybeSwitchByteOrder($bits)); |
||
170 | return $double; |
||
171 | } |
||
172 | |||
173 | private function decodeFloat($bits) |
||
174 | { |
||
175 | // XXX - Assumes IEEE 754 floats on platform |
||
176 | list(, $float) = unpack('f', $this->maybeSwitchByteOrder($bits)); |
||
177 | return $float; |
||
178 | } |
||
179 | |||
180 | private function decodeInt32($bytes) |
||
181 | { |
||
182 | $bytes = $this->zeroPadLeft($bytes, 4); |
||
183 | list(, $int) = unpack('l', $this->maybeSwitchByteOrder($bytes)); |
||
184 | return $int; |
||
185 | } |
||
186 | |||
187 | private function decodeMap($size, $offset) |
||
188 | { |
||
189 | |||
190 | $map = array(); |
||
191 | |||
192 | for ($i = 0; $i < $size; $i++) { |
||
193 | list($key, $offset) = $this->decode($offset); |
||
194 | list($value, $offset) = $this->decode($offset); |
||
195 | $map[$key] = $value; |
||
196 | } |
||
197 | |||
198 | return array($map, $offset); |
||
199 | } |
||
200 | |||
201 | private $pointerValueOffset = array( |
||
202 | 1 => 0, |
||
203 | 2 => 2048, |
||
204 | 3 => 526336, |
||
205 | 4 => 0, |
||
206 | ); |
||
207 | |||
208 | private function decodePointer($ctrlByte, $offset) |
||
209 | { |
||
210 | $pointerSize = (($ctrlByte >> 3) & 0x3) + 1; |
||
211 | |||
212 | $buffer = Util::read($this->fileStream, $offset, $pointerSize); |
||
213 | $offset = $offset + $pointerSize; |
||
214 | |||
215 | $packed = $pointerSize == 4 |
||
216 | ? $buffer |
||
217 | : (pack('C', $ctrlByte & 0x7)) . $buffer; |
||
218 | |||
219 | $unpacked = $this->decodeUint($packed); |
||
220 | $pointer = $unpacked + $this->pointerBase |
||
221 | + $this->pointerValueOffset[$pointerSize]; |
||
222 | |||
223 | return array($pointer, $offset); |
||
224 | } |
||
225 | |||
226 | private function decodeUint($bytes) |
||
227 | { |
||
228 | list(, $int) = unpack('N', $this->zeroPadLeft($bytes, 4)); |
||
229 | return $int; |
||
230 | } |
||
231 | |||
232 | private function decodeBigUint($bytes, $byteLength) |
||
233 | { |
||
234 | $maxUintBytes = log(PHP_INT_MAX, 2) / 8; |
||
235 | |||
236 | if ($byteLength == 0) { |
||
237 | return 0; |
||
238 | } |
||
239 | |||
240 | $numberOfLongs = ceil($byteLength / 4); |
||
241 | $paddedLength = $numberOfLongs * 4; |
||
242 | $paddedBytes = $this->zeroPadLeft($bytes, $paddedLength); |
||
243 | $unpacked = array_merge(unpack("N$numberOfLongs", $paddedBytes)); |
||
|
|||
244 | |||
245 | $integer = 0; |
||
246 | |||
247 | // 2^32 |
||
248 | $twoTo32 = '4294967296'; |
||
249 | |||
250 | foreach ($unpacked as $part) { |
||
251 | // We only use gmp or bcmath if the final value is too big |
||
252 | if ($byteLength <= $maxUintBytes) { |
||
253 | $integer = ($integer << 32) + $part; |
||
254 | } elseif (extension_loaded('gmp')) { |
||
255 | $integer = gmp_strval(gmp_add(gmp_mul($integer, $twoTo32), $part)); |
||
256 | } elseif (extension_loaded('bcmath')) { |
||
257 | $integer = bcadd(bcmul($integer, $twoTo32), $part); |
||
258 | } else { |
||
259 | throw new \RuntimeException( |
||
260 | 'The gmp or bcmath extension must be installed to read this database.' |
||
261 | ); |
||
262 | } |
||
263 | } |
||
264 | return $integer; |
||
265 | } |
||
266 | |||
267 | private function decodeString($bytes) |
||
268 | { |
||
269 | // XXX - NOOP. As far as I know, the end user has to explicitly set the |
||
270 | // encoding in PHP. Strings are just bytes. |
||
271 | return $bytes; |
||
272 | } |
||
273 | |||
274 | private function sizeFromCtrlByte($ctrlByte, $offset) |
||
275 | { |
||
276 | $size = $ctrlByte & 0x1f; |
||
277 | $bytesToRead = $size < 29 ? 0 : $size - 28; |
||
278 | $bytes = Util::read($this->fileStream, $offset, $bytesToRead); |
||
279 | $decoded = $this->decodeUint($bytes); |
||
280 | |||
281 | if ($size == 29) { |
||
282 | $size = 29 + $decoded; |
||
283 | } elseif ($size == 30) { |
||
284 | $size = 285 + $decoded; |
||
285 | } elseif ($size > 30) { |
||
286 | $size = ($decoded & (0x0FFFFFFF >> (32 - (8 * $bytesToRead)))) |
||
287 | + 65821; |
||
288 | } |
||
289 | |||
290 | return array($size, $offset + $bytesToRead); |
||
291 | } |
||
292 | |||
293 | private function zeroPadLeft($content, $desiredLength) |
||
294 | { |
||
295 | return str_pad($content, $desiredLength, "\x00", STR_PAD_LEFT); |
||
296 | } |
||
297 | |||
298 | private function maybeSwitchByteOrder($bytes) |
||
301 | } |
||
302 | |||
303 | private function isPlatformLittleEndian() |
||
304 | { |
||
305 | $testint = 0x00FF; |
||
306 | $packed = pack('S', $testint); |
||
307 | return $testint === current(unpack('v', $packed)); |
||
308 | } |
||
309 | } |
||
310 |