Total Complexity | 70 |
Total Lines | 510 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like UnTgz often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use UnTgz, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
33 | class UnTgz |
||
34 | { |
||
35 | /** @var array Holds the return array of files processed */ |
||
36 | protected $return = []; |
||
37 | |||
38 | /** @var array Holds the data found in each tar file header block */ |
||
39 | protected $_current = []; |
||
40 | |||
41 | /** @var int Holds the file pointer, generally to the 512 block we are working on */ |
||
42 | protected $_offset = 0; |
||
43 | |||
44 | /** @var bool If the file passes or fails crc check */ |
||
45 | protected $_crc_check = false; |
||
46 | |||
47 | /** @var string|int The current crc value of the data */ |
||
48 | protected $_crc; |
||
49 | |||
50 | /** @var int The claimed size of the data in the tarball */ |
||
51 | protected $_size; |
||
52 | |||
53 | /** @var bool If we are going to write out the files processed */ |
||
54 | protected $_write_this = false; |
||
55 | |||
56 | /** @var bool If to skip a file we found */ |
||
57 | protected $_skip = false; |
||
58 | |||
59 | /** @var bool If we found a file that was requested ($files_to_extract) */ |
||
60 | protected $_found = false; |
||
61 | |||
62 | /** @var array|string Current file header we are working on */ |
||
63 | protected $_header = []; |
||
64 | |||
65 | /** @var null|string[] Array of file names we want to extract from the archive */ |
||
66 | protected $files_to_extract; |
||
67 | |||
68 | /** @var FileFunctions The file functions class */ |
||
69 | protected $fileFunc; |
||
70 | |||
71 | /** @var string Holds the data string passed to the function */ |
||
72 | protected $data; |
||
73 | |||
74 | /** @var string Location to write the files. */ |
||
75 | protected $destination; |
||
76 | |||
77 | /** @var bool|string If we are looking for a single specific file */ |
||
78 | protected $single_file; |
||
79 | |||
80 | /** @var bool If we can overwrite a file with the same name in the destination */ |
||
81 | protected $overwrite; |
||
82 | |||
83 | /** |
||
84 | * Class initialization, passes variables, loads dependencies |
||
85 | * |
||
86 | * @param string $data |
||
87 | * @param string $destination |
||
88 | * @param bool|string $single_file |
||
89 | * @param bool $overwrite |
||
90 | * @param null|string[] $files_to_extract |
||
91 | * |
||
92 | * @throws Exception package_no_zlib |
||
93 | */ |
||
94 | public function __construct($data, $destination, $single_file = false, $overwrite = false, $files_to_extract = null) |
||
134 | } |
||
135 | |||
136 | /** |
||
137 | * Class controller, calls the ungzip / untar functions in required order |
||
138 | * |
||
139 | * @return bool|array |
||
140 | */ |
||
141 | public function read_tgz_data() |
||
142 | { |
||
143 | // Snif test that this is a .tgz tar.gz file |
||
144 | if (empty($this->_header) && !$this->check_valid_tgz()) |
||
145 | { |
||
146 | return false; |
||
147 | } |
||
148 | |||
149 | // The tgz information for this archive |
||
150 | if ($this->_read_header_tgz() === false) |
||
151 | { |
||
152 | return false; |
||
153 | } |
||
154 | |||
155 | // With the offset found, read and deflate the archive data |
||
156 | if ($this->_ungzip_data() === false) |
||
157 | { |
||
158 | return false; |
||
159 | } |
||
160 | |||
161 | // With the archive data in hand, we need to un tarball it |
||
162 | $this->_process_files(); |
||
163 | |||
164 | // Looking for a single file and this is it |
||
165 | if ($this->_found && $this->single_file) |
||
166 | { |
||
167 | return $this->_crc_check ? $this->_found : false; |
||
168 | } |
||
169 | |||
170 | // Wanted many files then we need to clean up |
||
171 | if ($this->destination !== null && !$this->single_file) |
||
172 | { |
||
173 | package_flush_cache(); |
||
174 | } |
||
175 | |||
176 | if ($this->single_file) |
||
177 | { |
||
178 | return false; |
||
179 | } |
||
180 | |||
181 | return $this->return; |
||
182 | } |
||
183 | |||
184 | /** |
||
185 | * Loads the 10 byte header and validates its a tgz file |
||
186 | * |
||
187 | * @return bool |
||
188 | */ |
||
189 | public function check_valid_tgz() |
||
190 | { |
||
191 | // No signature? |
||
192 | if (strlen($this->data) < 10) |
||
193 | { |
||
194 | return false; |
||
195 | } |
||
196 | |||
197 | // Unpack the 10 byte signature so we can see what we have |
||
198 | $this->_header = unpack('H2a/H2b/Ct/Cf/Vmtime/Cxtra/Cos', substr($this->data, 0, 10)); |
||
199 | |||
200 | // The IDentification number, gzip must be 1f8b |
||
201 | return strtolower($this->_header['a'] . $this->_header['b']) === '1f8b'; |
||
202 | } |
||
203 | |||
204 | /** |
||
205 | * Reads the archive file header |
||
206 | * |
||
207 | * What it does: |
||
208 | * |
||
209 | * - validates that the file is a tar.gz |
||
210 | * - validates that it is compressed with deflate |
||
211 | * - processes header information such that we can set the start of archive data |
||
212 | * - archive comment |
||
213 | * - archive filename |
||
214 | * - header CRC |
||
215 | * |
||
216 | * Signature Definition: |
||
217 | * - identification byte 1 and 2: 2 bytes, 0x1f 0x8b |
||
218 | * - Compression Method: 1 byte |
||
219 | * - Flags: 1 byte |
||
220 | * - Last modification time Contains a POSIX timestamp, 4 bytes |
||
221 | * - Compression flags (or extra flags): 1 byte |
||
222 | * - Operating system, Value that indicates on which operating system file was created, 1 byte |
||
223 | */ |
||
224 | private function _read_header_tgz() |
||
225 | { |
||
226 | // Compression method needs to be 8 = deflate! |
||
227 | if ($this->_header['t'] !== 8) |
||
228 | { |
||
229 | return false; |
||
230 | } |
||
231 | |||
232 | // Each bit of this byte represents a processing flag as follows |
||
233 | // 0 fTEXT, 1 fHCRC, 2 fEXTRA, 3 fNAME, 4 fCOMMENT, 5 fENCRYPT, 6-7 reserved |
||
234 | $flags = $this->_header['f']; |
||
235 | |||
236 | // Start to read any data defined by the flags, its the data after the 10 byte header |
||
237 | $this->_offset = 10; |
||
238 | |||
239 | // fEXTRA flag set we simply skip over its entry and the length of its data |
||
240 | if (($flags & 4) !== 0) |
||
241 | { |
||
242 | $xlen = unpack('vxlen', substr($this->data, $this->_offset, 2)); |
||
243 | $this->_offset += $xlen['xlen'] + 2; |
||
244 | } |
||
245 | |||
246 | // Read the filename, its zero terminated |
||
247 | if (($flags & 8) !== 0) |
||
248 | { |
||
249 | $this->_header['filename'] = ''; |
||
250 | while ($this->data[$this->_offset] !== "\0") |
||
251 | { |
||
252 | $this->_header['filename'] .= $this->data[$this->_offset++]; |
||
253 | } |
||
254 | |||
255 | $this->_offset++; |
||
256 | } |
||
257 | |||
258 | // Read the comment, its also zero terminated |
||
259 | if (($flags & 16) !== 0) |
||
260 | { |
||
261 | $this->_header['comment'] = ''; |
||
262 | while ($this->data[$this->_offset] !== "\0") |
||
263 | { |
||
264 | $this->_header['comment'] .= $this->data[$this->_offset++]; |
||
265 | } |
||
266 | |||
267 | $this->_offset++; |
||
268 | } |
||
269 | |||
270 | // "Read" the header CRC $crc16 = unpack('vcrc16', substr($data, $this->_offset, 2)); |
||
271 | if (($flags & 2) !== 0) |
||
272 | { |
||
273 | $this->_offset += 2; |
||
274 | } |
||
275 | } |
||
276 | |||
277 | /** |
||
278 | * We now know where the start of the compressed data is in the archive |
||
279 | * The data is terminated with 4 bytes of CRC and 4 bytes of the original input size |
||
280 | */ |
||
281 | public function _ungzip_data() |
||
282 | { |
||
283 | // Unpack the crc and original size, its the trailing 8 bytes |
||
284 | $check = unpack('Vcrc32/Visize', substr($this->data, strlen($this->data) - 8)); |
||
285 | $this->_crc = $check['crc32']; |
||
286 | $this->_size = $check['isize']; |
||
287 | |||
288 | // Extract the data, in this case its the tarball |
||
289 | $this->data = @gzinflate(substr($this->data, $this->_offset, strlen($this->data) - 8 - $this->_offset)); |
||
290 | |||
291 | // Check the crc and the data size |
||
292 | if (!$this->_check_crc() || (strlen($this->data) !== $check['isize'])) |
||
293 | { |
||
294 | return false; |
||
295 | } |
||
296 | } |
||
297 | |||
298 | /** |
||
299 | * Checks the saved vs calculated crc values |
||
300 | */ |
||
301 | private function _check_crc() |
||
302 | { |
||
303 | // Make sure we have unsigned crc padded hex. |
||
304 | $crc_uncompressed = hash('crc32b', $this->data); |
||
305 | $this->_crc = str_pad(dechex($this->_crc), 8, '0', STR_PAD_LEFT); |
||
306 | |||
307 | return $this->data !== false && $this->_crc === $crc_uncompressed; |
||
308 | } |
||
309 | |||
310 | /** |
||
311 | * Does the work of un tarballing the now ungzip'ed tar file |
||
312 | * |
||
313 | * What it does |
||
314 | * - Assumes its Ustar format |
||
315 | */ |
||
316 | private function _process_files() |
||
317 | { |
||
318 | // Tar files are written in 512 byte chunks |
||
319 | $blocks = strlen($this->data) / 512 - 1; |
||
320 | $this->_offset = 0; |
||
321 | |||
322 | // While we have blocks to process |
||
323 | while ($this->_offset < $blocks) |
||
324 | { |
||
325 | $this->_read_current_header(); |
||
326 | |||
327 | // Blank record? This is probably at the end of the file. |
||
328 | if (empty($this->_current['filename'])) |
||
329 | { |
||
330 | $this->_offset += 512; |
||
331 | continue; |
||
332 | } |
||
333 | |||
334 | // If its a directory, lets make sure it ends in a / |
||
335 | if ($this->_current['type'] == 5 && substr($this->_current['filename'], -1) !== '/') |
||
336 | { |
||
337 | $this->_current['filename'] .= '/'; |
||
338 | } |
||
339 | |||
340 | // Figure out what we will do with the data once we have it |
||
341 | $this->_determine_write_this(); |
||
342 | |||
343 | // Read the files data, move the offset to the start of the following 512 block |
||
344 | $size = ceil($this->_current['size'] / 512); |
||
345 | $this->_current['data'] = substr($this->data, ++$this->_offset << 9, $this->_current['size']); |
||
346 | $this->_offset += $size; |
||
347 | |||
348 | // We can write this file or return its data or ... |
||
349 | if ($this->_write_this && $this->destination !== null) |
||
350 | { |
||
351 | $this->_write_this_file(); |
||
352 | |||
353 | if ($this->_skip) |
||
354 | { |
||
355 | continue; |
||
356 | } |
||
357 | |||
358 | if ($this->_found) |
||
359 | { |
||
360 | return; |
||
361 | } |
||
362 | } |
||
363 | |||
364 | if (substr($this->_current['filename'], -1) !== '/') |
||
365 | { |
||
366 | $this->return[] = [ |
||
367 | 'filename' => $this->_current['filename'], |
||
368 | 'md5' => md5($this->_current['data']), |
||
369 | 'preview' => substr($this->_current['data'], 0, 100), |
||
370 | 'size' => $this->_current['size'], |
||
371 | 'formatted_size' => byte_format($this->_current['size']), |
||
372 | 'skipped' => false, |
||
373 | 'crc' => $this->_crc_check, |
||
374 | ]; |
||
375 | } |
||
376 | } |
||
377 | } |
||
378 | |||
379 | /** |
||
380 | * Reads the tar file header block, its a 512 block and contains the following: |
||
381 | * |
||
382 | * Signature Definition: |
||
383 | * - char filename[100]; File name |
||
384 | * - char mode[8]; File mode |
||
385 | * - char uid[8]; Owner's numeric user ID |
||
386 | * - char gid[8]; Group's numeric user ID |
||
387 | * - char size[12]; File size in bytes (octal base) |
||
388 | * - char mtime[12]; Last modification time in numeric Unix time format (octal) |
||
389 | * - char checksum[8]; Checksum for header record |
||
390 | * - char type[1]; Link indicator (file type 0=normal, 1=hard, 2=symlink ... 5=directory ... |
||
391 | * - char linkname[100]; Name of linked file |
||
392 | * - char magic[6]; UStar indicator "ustar" |
||
393 | * - char version[2]; UStar version "00" |
||
394 | * - char uname[32]; Owner user name |
||
395 | * - char gname[32]; Owner group name |
||
396 | * - char devmajor[8]; Device major number |
||
397 | * - char devminor[8]; Device minor number |
||
398 | * - char path[155]; Filename prefix |
||
399 | */ |
||
400 | private function _read_current_header() |
||
401 | { |
||
402 | $octdec = ['mode', 'uid', 'gid', 'size', 'mtime', 'checksum', 'type']; |
||
403 | |||
404 | // Each file object is preceded by a 512-byte header record on 512 boundaries |
||
405 | $this->_header = substr($this->data, $this->_offset << 9, 512); |
||
406 | |||
407 | // Unpack |
||
408 | $this->_current = unpack('a100filename/a8mode/a8uid/a8gid/a12size/a12mtime/a8checksum/a1type/a100linkname/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155path', $this->_header); |
||
409 | |||
410 | // Clean the header fields, convert octal to decimal as needed |
||
411 | foreach ($this->_current as $key => $value) |
||
412 | { |
||
413 | if (in_array($key, $octdec)) |
||
414 | { |
||
415 | $value = trim($value); |
||
416 | if (!empty($value) && is_numeric($value)) |
||
417 | { |
||
418 | if (($value % 10) >= 8) |
||
419 | { |
||
420 | $value = decoct($value); |
||
421 | } |
||
422 | |||
423 | $value = octdec($value); |
||
424 | } |
||
425 | |||
426 | $this->_current[$key] = $value; |
||
427 | } |
||
428 | else |
||
429 | { |
||
430 | $this->_current[$key] = trim($value); |
||
431 | } |
||
432 | } |
||
433 | } |
||
434 | |||
435 | /** |
||
436 | * Does what it says, determines if we are writing this file or not |
||
437 | */ |
||
438 | private function _determine_write_this() |
||
466 | } |
||
467 | } |
||
468 | |||
469 | /** |
||
470 | * Does the actual writing of the file |
||
471 | * |
||
472 | * - Writes the extracted file to disk or if we are extracting a single file |
||
473 | * - it returns the extracted data |
||
474 | */ |
||
475 | private function _write_this_file() |
||
520 | } |
||
521 | |||
522 | /** |
||
523 | * Checks the saved vs calculated crc values |
||
524 | */ |
||
525 | private function _check_header_crc() |
||
543 | } |
||
544 | } |
||
545 |
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"]
, you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths