UnTgz   F
last analyzed

Complexity

Total Complexity 70

Size/Duplication

Total Lines 510
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 158
dl 0
loc 510
rs 2.8
c 0
b 0
f 0
wmc 70

11 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 40 5
B _determine_write_this() 0 28 8
B _process_files() 0 58 10
A _ungzip_data() 0 14 3
B read_tgz_data() 0 41 11
C _write_this_file() 0 45 12
A _check_crc() 0 7 2
A _check_header_crc() 0 18 3
A check_valid_tgz() 0 13 2
A _read_current_header() 0 31 6
B _read_header_tgz() 0 50 8

How to fix   Complexity   

Complex Class

Complex classes like UnTgz often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UnTgz, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
/**
4
 * Class to unTgz a file (tar -xvf)
5
 *
6
 * @package   ElkArte Forum
7
 * @copyright ElkArte Forum contributors
8
 * @license   BSD http://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file)
9
 *
10
 * @version 2.0 dev
11
 *
12
 */
13
14
namespace ElkArte\Helper;
15
16
use ElkArte\Languages\Txt;
17
18
/**
19
 * Utility class to un gzip + un tar package files
20
 *
21
 * if destination is null
22
 * - returns a list of files in the archive.
23
 *
24
 * if single_file is true
25
 * - returns the contents of the file specified by destination, if it exists, or false.
26
 * - destination can start with * and / to signify that the file may come from any directory.
27
 * - destination should not begin with a / if single_file is true.
28
 * - overwrites existing files with newer modification times if and only if overwrite is true.
29
 * - creates the destination directory if it doesn't exist, and is is specified.
30
 * - requires zlib support be built into PHP.
31
 * - returns an array of the files extracted on success
32
 */
33
class UnTgz
34
{
35
	/** @var array Holds the return array of files processed */
36
	protected $return = [];
37
38
	/** @var array Holds the data found in each tar file header block */
39
	protected $_current = [];
40
41
	/** @var int Holds the file pointer, generally to the 512 block we are working on */
42
	protected $_offset = 0;
43
44
	/** @var bool If the file passes or fails crc check */
45
	protected $_crc_check = false;
46
47
	/** @var string|int The current crc value of the data */
48
	protected $_crc;
49
50
	/** @var int The claimed size of the data in the tarball */
51
	protected $_size;
52
53
	/** @var bool If we are going to write out the files processed */
54
	protected $_write_this = false;
55
56
	/** @var bool If to skip a file we found */
57
	protected $_skip = false;
58
59
	/** @var bool If we found a file that was requested ($files_to_extract) */
60
	protected $_found = false;
61
62
	/** @var array|string Current file header we are working on */
63
	protected $_header = [];
64
65
	/** @var null|string[] Array of file names we want to extract from the archive */
66
	protected $files_to_extract;
67
68
	/** @var FileFunctions The file functions class */
69
	protected $fileFunc;
70
71
	/** @var string Holds the data string passed to the function */
72
	protected $data;
73
74
	/** @var string Location to write the files. */
75
	protected $destination;
76
77
	/** @var bool|string If we are looking for a single specific file */
78
	protected $single_file;
79
80
	/** @var bool If we can overwrite a file with the same name in the destination */
81
	protected $overwrite;
82
83
	/**
84
	 * Class initialization, passes variables, loads dependencies
85
	 *
86
	 * @param string $data
87
	 * @param string $destination
88
	 * @param bool|string $single_file
89
	 * @param bool $overwrite
90
	 * @param null|string[] $files_to_extract
91
	 *
92
	 * @throws Exception package_no_zlib
93
	 */
94
	public function __construct($data, $destination, $single_file = false, $overwrite = false, $files_to_extract = null)
95
	{
96
		// Load the passed commands in to the class
97
		$this->data = $data;
98
		$this->destination = $destination;
99
		$this->single_file = $single_file;
100
		$this->overwrite = $overwrite;
101
		$this->files_to_extract = $files_to_extract;
102
103
		// This class sorta needs gzinflate!
104
		if (!function_exists('gzinflate'))
105
		{
106
			throw new Exceptions\Exception('package_no_zlib', 'critical');
0 ignored issues
show
Bug introduced by
The type ElkArte\Helper\Exceptions\Exception was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
107
		}
108
109
		// Make sure we have this loaded.
110
		Txt::load('Packages');
111
112
		// Likely to need this
113
		require_once(SUBSDIR . '/Package.subs.php');
114
		$this->fileFunc = FileFunctions::instance();
115
116
		// The destination needs exist, and be writable, or we are doomed
117
		umask(0);
118
		if ($this->destination === null)
119
		{
120
			return;
121
		}
122
123
		if ($this->single_file)
124
		{
125
			return;
126
		}
127
128
		if ($this->fileFunc->fileExists($this->destination))
129
		{
130
			return;
131
		}
132
133
		mktree($this->destination);
134
	}
135
136
	/**
137
	 * Class controller, calls the ungzip / untar functions in required order
138
	 *
139
	 * @return bool|array
140
	 */
141
	public function read_tgz_data()
142
	{
143
		// Snif test that this is a .tgz tar.gz file
144
		if (empty($this->_header) && !$this->check_valid_tgz())
145
		{
146
			return false;
147
		}
148
149
		// The tgz information for this archive
150
		if ($this->_read_header_tgz() === false)
151
		{
152
			return false;
153
		}
154
155
		// With the offset found, read and deflate the archive data
156
		if ($this->_ungzip_data() === false)
157
		{
158
			return false;
159
		}
160
161
		// With the archive data in hand, we need to un tarball it
162
		$this->_process_files();
163
164
		// Looking for a single file and this is it
165
		if ($this->_found && $this->single_file)
166
		{
167
			return $this->_crc_check ? $this->_found : false;
168
		}
169
170
		// Wanted many files then we need to clean up
171
		if ($this->destination !== null && !$this->single_file)
172
		{
173
			package_flush_cache();
174
		}
175
176
		if ($this->single_file)
177
		{
178
			return false;
179
		}
180
181
		return $this->return;
182
	}
183
184
	/**
185
	 * Loads the 10 byte header and validates its a tgz file
186
	 *
187
	 * @return bool
188
	 */
189
	public function check_valid_tgz()
190
	{
191
		// No signature?
192
		if (strlen($this->data) < 10)
193
		{
194
			return false;
195
		}
196
197
		// Unpack the 10 byte signature so we can see what we have
198
		$this->_header = unpack('H2a/H2b/Ct/Cf/Vmtime/Cxtra/Cos', substr($this->data, 0, 10));
199
200
		// The IDentification number, gzip must be 1f8b
201
		return strtolower($this->_header['a'] . $this->_header['b']) === '1f8b';
202
	}
203
204
	/**
205
	 * Reads the archive file header
206
	 *
207
	 * What it does:
208
	 *
209
	 * - validates that the file is a tar.gz
210
	 * - validates that it is compressed with deflate
211
	 * - processes header information such that we can set the start of archive data
212
	 *    - archive comment
213
	 *    - archive filename
214
	 *    - header CRC
215
	 *
216
	 * Signature Definition:
217
	 * - identification byte 1 and 2: 2 bytes, 0x1f 0x8b
218
	 * - Compression Method: 1 byte
219
	 * - Flags: 1 byte
220
	 * - Last modification time Contains a POSIX timestamp, 4 bytes
221
	 * - Compression flags (or extra flags): 1 byte
222
	 * - Operating system, Value that indicates on which operating system file was created, 1 byte
223
	 */
224
	private function _read_header_tgz()
225
	{
226
		// Compression method needs to be 8 = deflate!
227
		if ($this->_header['t'] !== 8)
228
		{
229
			return false;
230
		}
231
232
		// Each bit of this byte represents a processing flag as follows
233
		// 0 fTEXT, 1 fHCRC, 2 fEXTRA, 3 fNAME, 4 fCOMMENT, 5 fENCRYPT, 6-7 reserved
234
		$flags = $this->_header['f'];
235
236
		// Start to read any data defined by the flags, its the data after the 10 byte header
237
		$this->_offset = 10;
238
239
		// fEXTRA flag set we simply skip over its entry and the length of its data
240
		if (($flags & 4) !== 0)
241
		{
242
			$xlen = unpack('vxlen', substr($this->data, $this->_offset, 2));
243
			$this->_offset += $xlen['xlen'] + 2;
244
		}
245
246
		// Read the filename, its zero terminated
247
		if (($flags & 8) !== 0)
248
		{
249
			$this->_header['filename'] = '';
250
			while ($this->data[$this->_offset] !== "\0")
251
			{
252
				$this->_header['filename'] .= $this->data[$this->_offset++];
253
			}
254
255
			$this->_offset++;
256
		}
257
258
		// Read the comment, its also zero terminated
259
		if (($flags & 16) !== 0)
260
		{
261
			$this->_header['comment'] = '';
262
			while ($this->data[$this->_offset] !== "\0")
263
			{
264
				$this->_header['comment'] .= $this->data[$this->_offset++];
265
			}
266
267
			$this->_offset++;
268
		}
269
270
		// "Read" the header CRC $crc16 = unpack('vcrc16', substr($data, $this->_offset, 2));
271
		if (($flags & 2) !== 0)
272
		{
273
			$this->_offset += 2;
274
		}
275
	}
276
277
	/**
278
	 * We now know where the start of the compressed data is in the archive
279
	 * The data is terminated with 4 bytes of CRC and 4 bytes of the original input size
280
	 */
281
	public function _ungzip_data()
282
	{
283
		// Unpack the crc and original size, its the trailing 8 bytes
284
		$check = unpack('Vcrc32/Visize', substr($this->data, strlen($this->data) - 8));
285
		$this->_crc = $check['crc32'];
286
		$this->_size = $check['isize'];
287
288
		// Extract the data, in this case its the tarball
289
		$this->data = @gzinflate(substr($this->data, $this->_offset, strlen($this->data) - 8 - $this->_offset));
0 ignored issues
show
Documentation Bug introduced by
It seems like @gzinflate(substr($this-... - 8 - $this->_offset)) can also be of type false. However, the property $data is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
290
291
		// Check the crc and the data size
292
		if (!$this->_check_crc() || (strlen($this->data) !== $check['isize']))
293
		{
294
			return false;
295
		}
296
	}
297
298
	/**
299
	 * Checks the saved vs calculated crc values
300
	 */
301
	private function _check_crc()
302
	{
303
		// Make sure we have unsigned crc padded hex.
304
		$crc_uncompressed = hash('crc32b', $this->data);
305
		$this->_crc = str_pad(dechex($this->_crc), 8, '0', STR_PAD_LEFT);
0 ignored issues
show
Bug introduced by
It seems like $this->_crc can also be of type string; however, parameter $num of dechex() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

305
		$this->_crc = str_pad(dechex(/** @scrutinizer ignore-type */ $this->_crc), 8, '0', STR_PAD_LEFT);
Loading history...
306
307
		return $this->data !== false && $this->_crc === $crc_uncompressed;
308
	}
309
310
	/**
311
	 * Does the work of un tarballing the now ungzip'ed tar file
312
	 *
313
	 * What it does
314
	 * - Assumes its Ustar format
315
	 */
316
	private function _process_files()
317
	{
318
		// Tar files are written in 512 byte chunks
319
		$blocks = strlen($this->data) / 512 - 1;
320
		$this->_offset = 0;
321
322
		// While we have blocks to process
323
		while ($this->_offset < $blocks)
324
		{
325
			$this->_read_current_header();
326
327
			// Blank record?  This is probably at the end of the file.
328
			if (empty($this->_current['filename']))
329
			{
330
				$this->_offset += 512;
331
				continue;
332
			}
333
334
			// If its a directory, lets make sure it ends in a /
335
			if ($this->_current['type'] == 5 && substr($this->_current['filename'], -1) !== '/')
336
			{
337
				$this->_current['filename'] .= '/';
338
			}
339
340
			// Figure out what we will do with the data once we have it
341
			$this->_determine_write_this();
342
343
			// Read the files data, move the offset to the start of the following 512 block
344
			$size = ceil($this->_current['size'] / 512);
345
			$this->_current['data'] = substr($this->data, ++$this->_offset << 9, $this->_current['size']);
346
			$this->_offset += $size;
347
348
			// We can write this file or return its data or ...
349
			if ($this->_write_this && $this->destination !== null)
350
			{
351
				$this->_write_this_file();
352
353
				if ($this->_skip)
354
				{
355
					continue;
356
				}
357
358
				if ($this->_found)
359
				{
360
					return;
361
				}
362
			}
363
364
			if (substr($this->_current['filename'], -1) !== '/')
365
			{
366
				$this->return[] = [
367
					'filename' => $this->_current['filename'],
368
					'md5' => md5($this->_current['data']),
369
					'preview' => substr($this->_current['data'], 0, 100),
370
					'size' => $this->_current['size'],
371
					'formatted_size' => byte_format($this->_current['size']),
372
					'skipped' => false,
373
					'crc' => $this->_crc_check,
374
				];
375
			}
376
		}
377
	}
378
379
	/**
380
	 * Reads the tar file header block, its a 512 block and contains the following:
381
	 *
382
	 * Signature Definition:
383
	 * - char filename[100]; File name
384
	 * - char mode[8]; File mode
385
	 * - char uid[8]; Owner's numeric user ID
386
	 * - char gid[8]; Group's numeric user ID
387
	 * - char size[12]; File size in bytes (octal base)
388
	 * - char mtime[12]; Last modification time in numeric Unix time format (octal)
389
	 * - char checksum[8]; Checksum for header record
390
	 * - char type[1]; Link indicator (file type 0=normal, 1=hard, 2=symlink ... 5=directory ...
391
	 * - char linkname[100]; Name of linked file
392
	 * - char magic[6]; UStar indicator "ustar"
393
	 * - char version[2]; UStar version "00"
394
	 * - char uname[32]; Owner user name
395
	 * - char gname[32]; Owner group name
396
	 * - char devmajor[8]; Device major number
397
	 * - char devminor[8]; Device minor number
398
	 * - char path[155]; Filename prefix
399
	 */
400
	private function _read_current_header()
401
	{
402
		$octdec = ['mode', 'uid', 'gid', 'size', 'mtime', 'checksum', 'type'];
403
404
		// Each file object is preceded by a 512-byte header record on 512 boundaries
405
		$this->_header = substr($this->data, $this->_offset << 9, 512);
406
407
		// Unpack
408
		$this->_current = unpack('a100filename/a8mode/a8uid/a8gid/a12size/a12mtime/a8checksum/a1type/a100linkname/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155path', $this->_header);
409
410
		// Clean the header fields, convert octal to decimal as needed
411
		foreach ($this->_current as $key => $value)
412
		{
413
			if (in_array($key, $octdec))
414
			{
415
				$value = trim($value);
416
				if (!empty($value) && is_numeric($value))
417
				{
418
					if (($value % 10) >= 8)
419
					{
420
						$value = decoct($value);
0 ignored issues
show
Bug introduced by
$value of type string is incompatible with the type integer expected by parameter $num of decoct(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

420
						$value = decoct(/** @scrutinizer ignore-type */ $value);
Loading history...
421
					}
422
423
					$value = octdec($value);
424
				}
425
426
				$this->_current[$key] = $value;
427
			}
428
			else
429
			{
430
				$this->_current[$key] = trim($value);
431
			}
432
		}
433
	}
434
435
	/**
436
	 * Does what it says, determines if we are writing this file or not
437
	 */
438
	private function _determine_write_this()
439
	{
440
		// Not a directory and doesn't exist already...
441
		if (substr($this->_current['filename'], -1) !== '/' && !$this->fileFunc->fileExists($this->destination . '/' . $this->_current['filename']))
442
		{
443
			$this->_write_this = true;
444
		}
445
		// File exists... check if it is newer.
446
		elseif (substr($this->_current['filename'], -1) !== '/')
447
		{
448
			$this->_write_this = $this->overwrite || filemtime($this->destination . '/' . $this->_current['filename']) < $this->_current['mtime'];
449
		}
450
		// Folder... create.
451
		elseif ($this->destination !== null && !$this->single_file)
452
		{
453
			// Protect from accidental parent directory writing...
454
			$this->_current['filename'] = strtr($this->_current['filename'], ['../' => '', '/..' => '']);
455
456
			if (!$this->fileFunc->fileExists($this->destination . '/' . $this->_current['filename']))
457
			{
458
				mktree($this->destination . '/' . $this->_current['filename']);
459
			}
460
461
			$this->_write_this = false;
462
		}
463
		else
464
		{
465
			$this->_write_this = false;
466
		}
467
	}
468
469
	/**
470
	 * Does the actual writing of the file
471
	 *
472
	 * - Writes the extracted file to disk or if we are extracting a single file
473
	 * - it returns the extracted data
474
	 */
475
	private function _write_this_file()
476
	{
477
		$this->_skip = false;
478
		$this->_found = false;
479
480
		// A directory may need to be created
481
		if (!$this->single_file && strpos($this->_current['filename'], '/') !== false)
482
		{
483
			mktree($this->destination . '/' . dirname($this->_current['filename']));
484
		}
485
486
		// Is this the file we're looking for?
487
		if ($this->single_file && ($this->destination === $this->_current['filename']
488
				|| $this->destination === '*/' . basename($this->_current['filename'])))
489
		{
490
			$this->_found = $this->_current['data'];
491
		}
492
		// If we're looking for another file, keep going.
493
		elseif ($this->single_file)
494
		{
495
			$this->_skip = true;
496
		}
497
		// Looking for restricted files?
498
		elseif ($this->files_to_extract !== null && !in_array($this->_current['filename'], $this->files_to_extract, true))
499
		{
500
			$this->_skip = true;
501
		}
502
503
		// Write it out then
504
		if ($this->_skip)
505
		{
506
			return;
507
		}
508
509
		if ($this->_found !== false)
510
		{
511
			return;
512
		}
513
514
		if (!$this->_check_header_crc())
515
		{
516
			return;
517
		}
518
519
		package_put_contents($this->destination . '/' . $this->_current['filename'], $this->_current['data']);
520
	}
521
522
	/**
523
	 * Checks the saved vs calculated crc values
524
	 */
525
	private function _check_header_crc()
526
	{
527
		$this->_crc = 256;
528
529
		// Build the checksum for this header and make sure it matches what it claims
530
		for ($i = 0; $i < 148; $i++)
531
		{
532
			$this->_crc += ord($this->_header[$i]);
533
		}
534
535
		for ($i = 156; $i < 512; $i++)
536
		{
537
			$this->_crc += ord($this->_header[$i]);
538
		}
539
540
		$this->_crc_check = $this->_current['checksum'] === $this->_crc;
541
542
		return $this->_crc_check;
543
	}
544
}
545