Completed
Push — patch_1-1-4 ( 3f780f...826343 )
by Emanuele
25:17 queued 11:40
created

UnTgz::_check_header_crc()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 14

Duplication

Lines 4
Ratio 28.57 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 0
Metric Value
cc 3
nc 4
nop 0
dl 4
loc 14
rs 9.7998
c 0
b 0
f 0
ccs 0
cts 10
cp 0
crap 12
1
<?php
2
3
/**
4
 * Class to unTgz a file (tar -xvf)
5
 *
6
 * @name      ElkArte Forum
7
 * @copyright ElkArte Forum contributors
8
 * @license   BSD http://opensource.org/licenses/BSD-3-Clause
9
 *
10
 * @version 1.1
11
 *
12
 */
13
14
/**
15
 * Utility class to un gzip + un tar package files
16
 *
17
 * if destination is null
18
 * - returns a list of files in the archive.
19
 *
20
 * if single_file is true
21
 * - returns the contents of the file specified by destination, if it exists, or false.
22
 * - destination can start with * and / to signify that the file may come from any directory.
23
 * - destination should not begin with a / if single_file is true.
24
 * - overwrites existing files with newer modification times if and only if overwrite is true.
25
 * - creates the destination directory if it doesn't exist, and is is specified.
26
 * - requires zlib support be built into PHP.
27
 * - returns an array of the files extracted on success
28
 */
29
class UnTgz
30
{
31
	/**
32
	 * Holds the return array of files processed
33
	 * @var mixed[]
34
	 */
35
	protected $return = array();
36
37
	/**
38
	 * Holds the data found in each tar file header block
39
	 * @var mixed[]
40
	 */
41
	protected $_current = array();
42
43
	/**
44
	 * Holds the file pointer, generally to the 512 block we are working on
45
	 * @var int
46
	 */
47
	protected $_offset = 0;
48
49
	/**
50
	 * If the file passes or fails crc check
51
	 * @var boolean
52
	 */
53
	protected $_crc_check = false;
54
55
	/**
56
	 * The current crc value of the data
57
	 * @var string|int
58
	 */
59
	protected $_crc;
60
61
	/**
62
	 * The claimed size of the data in the tarball
63
	 * @var int
64
	 */
65
	protected $_size;
66
67
	/**
68
	 * If we are going to write out the files processed
69
	 * @var boolean
70
	 */
71
	protected $_write_this = false;
72
73
	/**
74
	 * If we will skip a file we found
75
	 * @var boolean
76
	 */
77
	protected $_skip = false;
78
79
	/**
80
	 * If we found a file that was requested ($files_to_extract)
81
	 * @var boolean
82
	 */
83
	protected $_found = false;
84
85
	/**
86
	 * Current file header we are working on
87
	 * @var mixed[]|string
88
	 */
89
	protected $_header = array();
90
91
	/**
92
	 * Array of file names we want to extract from the archive
93
	 * @var null|string[]
94
	 */
95
	protected $files_to_extract;
96
97
	/**
98
	 * Holds the data string passed to the function
99
	 * @var string
100
	 */
101
	protected $data;
102
103
	/**
104
	 * Location to write the files.
105
	 * @var string
106
	 */
107
	protected $destination;
108
109
	/**
110
	 * If we are looking for a single specific file
111
	 * @var boolean|string
112
	 */
113
	protected $single_file;
114
115
	/**
116
	 * If we can overwrite a file with the same name in the destination
117
	 * @var boolean
118
	 */
119
	protected $overwrite;
120
121
	/**
122
	 * Class initialization, passes variables, loads dependencies
123
	 *
124
	 * @param string        $data
125
	 * @param string        $destination
126
	 * @param bool|string   $single_file
127
	 * @param bool          $overwrite
128
	 * @param null|string[] $files_to_extract
129
	 *
130
	 * @throws Elk_Exception package_no_zlib
131
	 */
132 View Code Duplication
	public function __construct($data, $destination, $single_file = false, $overwrite = false, $files_to_extract = null)
133
	{
134
		// Load the passed commands in to the class
135
		$this->data = $data;
136
		$this->destination = $destination;
137
		$this->single_file = $single_file;
138
		$this->overwrite = $overwrite;
139
		$this->files_to_extract = $files_to_extract;
140
141
		// This class sorta needs gzinflate!
142
		if (!function_exists('gzinflate'))
143
			throw new Elk_Exception('package_no_zlib', 'critical');
144
145
		// Make sure we have this loaded.
146
		loadLanguage('Packages');
147
148
		// Likely to need this
149
		require_once(SUBSDIR . '/Package.subs.php');
150
151
		// The destination needs exist and be writable or we are doomed
152
		umask(0);
153
		if ($this->destination !== null && !file_exists($this->destination) && !$this->single_file)
154
			mktree($this->destination, 0777);
155
	}
156
157
	/**
158
	 * Class controller, calls the ungzip / untar functions in required order
159
	 *
160
	 * @return boolean|array
161
	 */
162
	public function read_tgz_data()
163
	{
164
		// Snif test that this is a .tgz tar.gz file
165
		if (empty($this->_header) && $this->check_valid_tgz() === false)
166
			return false;
167
168
		// The tgz information for this archive
169
		if ($this->_read_header_tgz() === false)
170
			return false;
171
172
		// With the offset found, read and deflate the archive data
173
		if ($this->_ungzip_data() === false)
174
			return false;
175
176
		// With the archive data in hand, we need to un tarball it
177
		$this->_process_files();
178
179
		// Looking for a single file and this is it
180
		if ($this->_found && $this->single_file)
181
			return $this->_crc_check ? $this->_found : false;
182
183
		// Wanted many files then we need to clean up
184
		if ($this->destination !== null && !$this->single_file)
185
			package_flush_cache();
186
187
		if ($this->single_file)
188
			return false;
189
		else
190
			return $this->return;
191
	}
192
193
	/**
194
	 * Loads the 10 byte header and validates its a tgz file
195
	 *
196
	 * @return boolean
197
	 */
198
	public function check_valid_tgz()
199
	{
200
		// No signature?
201
		if (strlen($this->data) < 10)
202
			return false;
203
204
		// Unpack the 10 byte signature so we can see what we have
205
		$this->_header = unpack('H2a/H2b/Ct/Cf/Vmtime/Cxtra/Cos', substr($this->data, 0, 10));
206
207
		// The IDentification number, gzip must be 1f8b
208
		return strtolower($this->_header['a'] . $this->_header['b']) === '1f8b';
209
	}
210
211
	/**
212
	 * Reads the archive file header
213
	 *
214
	 * What it does:
215
	 *
216
	 * - validates that the file is a tar.gz
217
	 * - validates that its compressed with deflate
218
	 * - processes header information so we can set the start of archive data
219
	 *    - archive comment
220
	 *    - archive filename
221
	 *    - header CRC
222
	 *
223
	 * Signature Definition:
224
	 * - identification byte 1 and 2: 2 bytes, 0x1f 0x8b
225
	 * - Compression Method: 1 byte
226
	 * - Flags: 1 byte
227
	 * - Last modification time Contains a POSIX timestamp, 4 bytes
228
	 * - Compression flags (or extra flags): 1 byte
229
	 * - Operating system, Value that indicates on which operating system file was created, 1 byte
230
	 */
231
	private function _read_header_tgz()
232
	{
233
		// Compression method needs to be 8 = deflate!
234
		if ($this->_header['t'] !== 8)
235
			return false;
236
237
		// Each bit of this byte represents a processing flag as follows
238
		// 0 fTEXT, 1 fHCRC, 2 fEXTRA, 3 fNAME, 4 fCOMMENT, 5 fENCRYPT, 6-7 reserved
239
		$flags = $this->_header['f'];
240
241
		// Start to read any data defined by the flags, its the data after the 10 byte header
242
		$this->_offset = 10;
243
244
		// fEXTRA flag set we simply skip over its entry and the length of its data
245
		if ($flags & 4)
246
		{
247
			$xlen = unpack('vxlen', substr($this->data, $this->_offset, 2));
248
			$this->_offset += $xlen['xlen'] + 2;
249
		}
250
251
		// Read the filename, its zero terminated
252 View Code Duplication
		if ($flags & 8)
253
		{
254
			$this->_header['filename'] = '';
255
			while ($this->data[$this->_offset] !== "\0")
256
				$this->_header['filename'] .= $this->data[$this->_offset++];
257
			$this->_offset++;
258
		}
259
260
		// Read the comment, its also zero terminated
261 View Code Duplication
		if ($flags & 16)
262
		{
263
			$this->_header['comment'] = '';
264
			while ($this->data[$this->_offset] !== "\0")
265
				$this->_header['comment'] .= $this->data[$this->_offset++];
266
			$this->_offset++;
267
		}
268
269
		// "Read" the header CRC $crc16 = unpack('vcrc16', substr($data, $this->_offset, 2));
270
		if ($flags & 2)
271
			$this->_offset += 2;
272
	}
273
274
	/**
275
	 * We now know where the start of the compressed data is in the archive
276
	 * The data is terminated with 4 bytes of CRC and 4 bytes of the original input size
277
	 */
278
	public function _ungzip_data()
279
	{
280
		// Unpack the crc and original size, its the trailing 8 bytes
281
		$check = unpack('Vcrc32/Visize', substr($this->data, strlen($this->data) - 8));
282
		$this->_crc = $check['crc32'];
283
		$this->_size = $check['isize'];
284
285
		// Extract the data, in this case its the tarball
286
		$this->data = @gzinflate(substr($this->data, $this->_offset, strlen($this->data) - 8 - $this->_offset));
287
288
		// Check the crc and the data size
289
		if (!$this->_check_crc() || (strlen($this->data) !== $check['isize']))
290
			return false;
291
	}
292
293
	/**
294
	 * Does the work of un tarballing the now ungzip'ed tar file
295
	 *
296
	 * What it does
297
	 * - Assumes its Ustar format
298
	 */
299
	private function _process_files()
300
	{
301
		// Tar files are written in 512 byte chunks
302
		$blocks = strlen($this->data) / 512 - 1;
303
		$this->_offset = 0;
304
305
		// While we have blocks to process
306
		while ($this->_offset < $blocks)
307
		{
308
			$this->_read_current_header();
309
310
			// Blank record?  This is probably at the end of the file.
311
			if (empty($this->_current['filename']))
312
			{
313
				$this->_offset += 512;
314
				continue;
315
			}
316
317
			// If its a directory, lets make sure it ends in a /
318
			if ($this->_current['type'] == 5 && substr($this->_current['filename'], -1) !== '/')
319
				$this->_current['filename'] .= '/';
320
321
			// Figure out what we will do with the data once we have it
322
			$this->_determine_write_this();
323
324
			// Read the files data, move the offset to the start of the following 512 block
325
			$size = ceil($this->_current['size'] / 512);
326
			$this->_current['data'] = substr($this->data, ++$this->_offset << 9, $this->_current['size']);
327
			$this->_offset += $size;
328
329
			// We can write this file or return its data or ...
330 View Code Duplication
			if ($this->_write_this && $this->destination !== null)
331
			{
332
				$this->_write_this_file();
333
334
				if ($this->_skip)
335
					continue;
336
337
				if ($this->_found)
338
					return;
339
			}
340
341 View Code Duplication
			if (substr($this->_current['filename'], -1) !== '/')
342
			{
343
				$this->return[] = array(
344
					'filename' => $this->_current['filename'],
345
					'md5' => md5($this->_current['data']),
346
					'preview' => substr($this->_current['data'], 0, 100),
347
					'size' => $this->_current['size'],
348
					'skipped' => false,
349
					'crc' => $this->_crc_check,
350
				);
351
			}
352
		}
353
	}
354
355
	/**
356
	 * Reads the tar file header block, its a 512 block and contains the following:
357
	 *
358
	 * Signature Definition:
359
	 * - char filename[100]; File name
360
	 * - char mode[8]; File mode
361
	 * - char uid[8]; Owner's numeric user ID
362
	 * - char gid[8]; Group's numeric user ID
363
	 * - char size[12]; File size in bytes (octal base)
364
	 * - char mtime[12]; Last modification time in numeric Unix time format (octal)
365
	 * - char checksum[8]; Checksum for header record
366
	 * - char type[1]; Link indicator (file type 0=normal, 1=hard, 2=symlink ... 5=directory ...
367
	 * - char linkname[100]; Name of linked file
368
	 * - char magic[6]; UStar indicator "ustar"
369
	 * - char version[2]; UStar version "00"
370
	 * - char uname[32]; Owner user name
371
	 * - char gname[32]; Owner group name
372
	 * - char devmajor[8]; Device major number
373
	 * - char devminor[8]; Device minor number
374
	 * - char path[155]; Filename prefix
375
	 */
376
	private function _read_current_header()
377
	{
378
		$octdec = array('mode', 'uid', 'gid', 'size', 'mtime', 'checksum', 'type');
379
380
		// Each file object is preceded by a 512-byte header record on 512 boundaries
381
		$this->_header = substr($this->data, $this->_offset << 9, 512);
382
383
		// Unpack
384
		$this->_current = unpack('a100filename/a8mode/a8uid/a8gid/a12size/a12mtime/a8checksum/a1type/a100linkname/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155path', $this->_header);
385
386
		// Clean the header fields, convert octal to decimal as needed
387
		foreach ($this->_current as $key => $value)
388
		{
389
			if (in_array($key, $octdec))
390
				$this->_current[$key] = octdec(trim($value));
391
			else
392
				$this->_current[$key] = trim($value);
393
		}
394
	}
395
396
	/**
397
	 * Does what it says, determines if we are writing this file or not
398
	 */
399
	private function _determine_write_this()
400
	{
401
		// Not a directory and doesn't exist already...
402
		if (substr($this->_current['filename'], -1) !== '/' && !file_exists($this->destination . '/' . $this->_current['filename']))
403
			$this->_write_this = true;
404
		// File exists... check if it is newer.
405
		elseif (substr($this->_current['filename'], -1) !== '/')
406
			$this->_write_this = $this->overwrite || filemtime($this->destination . '/' . $this->_current['filename']) < $this->_current['mtime'];
407
		// Folder... create.
408
		elseif ($this->destination !== null && !$this->single_file)
409
		{
410
			// Protect from accidental parent directory writing...
411
			$this->_current['filename'] = strtr($this->_current['filename'], array('../' => '', '/..' => ''));
412
413 View Code Duplication
			if (!file_exists($this->destination . '/' . $this->_current['filename']))
414
				mktree($this->destination . '/' . $this->_current['filename'], 0777);
415
			$this->_write_this = false;
416
		}
417
		else
418
			$this->_write_this = false;
419
	}
420
421
	/**
422
	 * Does the actual writing of the file
423
	 *
424
	 * - Writes the extracted file to disk or if we are extracting a single file
425
	 * - it returns the extracted data
426
	 */
427
	private function _write_this_file()
428
	{
429
		$this->_skip = false;
430
		$this->_found = false;
431
432
		// A directory may need to be created
433
		if (strpos($this->_current['filename'], '/') !== false && !$this->single_file)
434
			mktree($this->destination . '/' . dirname($this->_current['filename']), 0777);
435
436
		// Is this the file we're looking for?
437
		if ($this->single_file && ($this->destination === $this->_current['filename'] || $this->destination === '*/' . basename($this->_current['filename'])))
438
			$this->_found = $this->_current['data'];
439
		// If we're looking for another file, keep going.
440
		elseif ($this->single_file)
441
			$this->_skip = true;
442
		// Looking for restricted files?
443
		elseif ($this->files_to_extract !== null && !in_array($this->_current['filename'], $this->files_to_extract))
444
			$this->_skip = true;
445
446
		// Write it out then
447 View Code Duplication
		if ($this->_check_header_crc() && $this->_skip === false && $this->_found === false)
448
			package_put_contents($this->destination . '/' . $this->_current['filename'], $this->_current['data']);
449
	}
450
451
	/**
452
	 * Checks the saved vs calculated crc values
453
	 */
454
	private function _check_crc()
455
	{
456
		// Make sure we have unsigned crc padded hex.
457
		$crc_uncompressed = hash('crc32b', $this->data);
458
		$this->_crc = str_pad(dechex($this->_crc), 8, '0', STR_PAD_LEFT);
459
460
		return !($this->data === false || ($this->_crc !== $crc_uncompressed));
461
	}
462
463
	/**
464
	 * Checks the saved vs calculated crc values
465
	 */
466
	private function _check_header_crc()
467
	{
468
		$this->_crc = 256;
469
470
		// Build the checksum for this header and make sure it matches what it claims
471 View Code Duplication
		for ($i = 0; $i < 148; $i++)
472
			$this->_crc += ord($this->_header[$i]);
473 View Code Duplication
		for ($i = 156; $i < 512; $i++)
474
			$this->_crc += ord($this->_header[$i]);
475
476
		$this->_crc_check = $this->_current['checksum'] === $this->_crc;
477
478
		return $this->_crc_check;
479
	}
480
}
481