GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Pull Request — master (#1192)
by
unknown
14:48
created

TCPDF_PARSER::getXrefData()   F

Complexity

Conditions 15
Paths 1340

Size

Total Lines 69
Code Lines 43

Duplication

Lines 9
Ratio 13.04 %

Importance

Changes 0
Metric Value
cc 15
eloc 43
nc 1340
nop 2
dl 9
loc 69
rs 2.5316
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
//============================================================+
3
// File name   : tcpdf_parser.php
4
// Version     : 1.0.000
5
// Begin       : 2011-05-23
6
// Last Update : 2011-07-14
7
// Author      : Nicola Asuni - Tecnick.com S.r.l - Via Della Pace, 11 - 09044 - Quartucciu (CA) - ITALY - www.tecnick.com - [email protected]
8
// License     : http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT GNU-LGPLv3 + YOU CAN'T REMOVE ANY TCPDF COPYRIGHT NOTICE OR LINK FROM THE GENERATED PDF DOCUMENTS.
9
// -------------------------------------------------------------------
10
// Copyright (C) 2011-2011  Nicola Asuni - Tecnick.com S.r.l.
11
//
12
// This file is part of TCPDF software library.
13
//
14
// TCPDF is free software: you can redistribute it and/or modify it
15
// under the terms of the GNU Lesser General Public License as
16
// published by the Free Software Foundation, either version 3 of the
17
// License, or (at your option) any later version. Additionally,
18
// YOU CAN'T REMOVE ANY TCPDF COPYRIGHT NOTICE OR LINK FROM THE
19
// GENERATED PDF DOCUMENTS.
20
//
21
// TCPDF is distributed in the hope that it will be useful, but
22
// WITHOUT ANY WARRANTY; without even the implied warranty of
23
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
24
// See the GNU Lesser General Public License for more details.
25
//
26
// You should have received a copy of the License
27
// along with TCPDF. If not, see
28
// <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>.
29
//
30
// See LICENSE.TXT file for more information.
31
// -------------------------------------------------------------------
32
//
33
// Description : This is a PHP class for parsing PDF documents.
34
//
35
//============================================================+
36
37
/**
38
 * @file
39
 * This is a PHP class for parsing PDF documents.<br>
40
 * @package com.tecnick.tcpdf
41
 * @author Nicola Asuni
42
 * @version 1.0.000
43
 */
44
45
// include class for decoding filters
46
require_once(dirname(__FILE__).'/tcpdf_filters.php');
47
48
/**
49
 * @class TCPDF_PARSER
50
 * This is a PHP class for parsing PDF documents.<br>
51
 * @package com.tecnick.tcpdf
52
 * @brief This is a PHP class for parsing PDF documents..
53
 * @version 1.0.000
54
 * @author Nicola Asuni - [email protected]
55
 */
56
class TCPDF_PARSER {
57
58
	/**
59
	 * Raw content of the PDF document.
60
	 * @private
61
	 */
62
	private $pdfdata = '';
63
64
	/**
65
	 * XREF data.
66
	 * @protected
67
	 */
68
	protected $xref = array();
69
70
	/**
71
	 * Array of PDF objects.
72
	 * @protected
73
	 */
74
	protected $objects = array();
75
76
	/**
77
	 * Class object for decoding filters.
78
	 * @private
79
	 */
80
	private $FilterDecoders;
81
82
// -----------------------------------------------------------------------------
83
84
	/**
85
	 * Parse a PDF document an return an array of objects.
86
	 * @param $data (string) PDF data to parse.
87
	 * @public
88
	 * @since 1.0.000 (2011-05-24)
89
	 */
90
	public function __construct($data) {
91
		if (empty($data)) {
92
			$this->Error('Empty PDF data.');
93
		}
94
		$this->pdfdata = $data;
95
		// get length
96
		$pdflen = strlen($this->pdfdata);
97
		// initialize class for decoding filters
98
		$this->FilterDecoders = new TCPDF_FILTERS();
99
		// get xref and trailer data
100
		$this->xref = $this->getXrefData();
101
		// parse all document objects
102
		$this->objects = array();
103
		foreach ($this->xref['xref'] as $obj => $offset) {
104
			if (!isset($this->objects[$obj])) {
105
				$this->objects[$obj] = $this->getIndirectObject($obj, $offset, true);
106
			}
107
		}
108
		// release some memory
109
		unset($this->pdfdata);
110
		$this->pdfdata = '';
111
	}
112
113
	/**
114
	 * Return an array of parsed PDF document objects.
115
	 * @return (array) Array of parsed PDF document objects.
116
	 * @public
117
	 * @since 1.0.000 (2011-06-26)
118
	 */
119
	public function getParsedData() {
120
		return array($this->xref, $this->objects);
121
	}
122
123
	/**
124
	 * Get xref (cross-reference table) and trailer data from PDF document data.
125
	 * @param $offset (int) xref offset (if know).
126
	 * @param $xref (array) previous xref array (if any).
127
	 * @return Array containing xref and trailer data.
128
	 * @protected
129
	 * @since 1.0.000 (2011-05-24)
130
	 */
131
	protected function getXrefData($offset=0, $xref=array()) {
132
		// find last startxref
133
		if (preg_match_all('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_SET_ORDER, $offset) == 0) {
134
			$this->Error('Unable to find startxref');
135
		}
136
		$matches = array_pop($matches);
137
		$startxref = $matches[1];
138
		// check xref position
139
		if (strpos($this->pdfdata, 'xref', $startxref) != $startxref) {
140
			$this->Error('Unable to find xref');
141
		}
142
		// extract xref data (object indexes and offsets)
143
		$offset = $startxref + 5;
144
		// initialize object number
145
		$obj_num = 0;
146
		while (preg_match('/^([0-9]+)[\s]([0-9]+)[\s]?([nf]?)/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
147
			$offset = (strlen($matches[0][0]) + $matches[0][1]);
148
			if ($matches[3][0] == 'n') {
149
				// create unique object index: [object number]_[generation number]
150
				$index = $obj_num.'_'.intval($matches[2][0]);
151
				// check if object already exist
152
				if (!isset($xref['xref'][$index])) {
153
					// store object offset position
154
					$xref['xref'][$index] = intval($matches[1][0]);
155
				}
156
				++$obj_num;
157
				$offset += 2;
158
			} elseif ($matches[3][0] == 'f') {
159
				++$obj_num;
160
				$offset += 2;
161
			} else {
162
				// object number (index)
163
				$obj_num = intval($matches[1][0]);
164
			}
165
		}
166
		// get trailer data
167
		if (preg_match('/trailer[\s]*<<(.*)>>[\s]*[\r\n]+startxref[\s]*[\r\n]+/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
168
			$trailer_data = $matches[1][0];
169
			if (!isset($xref['trailer'])) {
170
				// get only the last updated version
171
				$xref['trailer'] = array();
172
				// parse trailer_data
173
				if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
174
					$xref['trailer']['size'] = intval($matches[1]);
175
				}
176 View Code Duplication
				if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
177
					$xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]);
178
				}
179 View Code Duplication
				if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
180
					$xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]);
181
				}
182 View Code Duplication
				if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
183
					$xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]);
184
				}
185
				if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
186
					$xref['trailer']['id'] = array();
187
					$xref['trailer']['id'][0] = $matches[1];
188
					$xref['trailer']['id'][1] = $matches[2];
189
				}
190
			}
191
			if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
192
				// get previous xref
193
				$xref = getXrefData(substr($this->pdfdata, 0, $startxref), intval($matches[1]), $xref);
194
			}
195
		} else {
196
			$this->Error('Unable to find trailer');
197
		}
198
		return $xref;
199
	}
200
201
	/**
202
	 * Get object type, raw value and offset to next object
203
	 * @param $offset (int) Object offset.
204
	 * @return array containing object type, raw value and offset to next object
205
	 * @protected
206
	 * @since 1.0.000 (2011-06-20)
207
	 */
208
	protected function getRawObject($offset=0) {
209
		$objtype = ''; // object type to be returned
210
		$objval = ''; // object value to be returned
211
		// skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP)
212
		$offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset);
213
		// get first char
214
		$char = $this->pdfdata{$offset};
215
		// get object type
216
		switch ($char) {
217
			case '%': { // \x25 PERCENT SIGN
218
				// skip comment and search for next token
219
				$next = strcspn($this->pdfdata, "\r\n", $offset);
220
				if ($next > 0) {
221
					$offset += $next;
222
					return $this->getRawObject($this->pdfdata, $offset);
0 ignored issues
show
Unused Code introduced by
The call to TCPDF_PARSER::getRawObject() has too many arguments starting with $offset.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
223
				}
224
				break;
225
			}
226
			case '/': { // \x2F SOLIDUS
227
				// name object
228
				$objtype = $char;
229
				++$offset;
230 View Code Duplication
				if (preg_match('/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/', substr($this->pdfdata, $offset, 256), $matches) == 1) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
231
					$objval = $matches[1]; // unescaped value
232
					$offset += strlen($objval);
233
				}
234
				break;
235
			}
236
			case '(':   // \x28 LEFT PARENTHESIS
237
			case ')': { // \x29 RIGHT PARENTHESIS
238
				// literal string object
239
				$objtype = $char;
240
				++$offset;
241
				$strpos = $offset;
242
				if ($char == '(') {
243
					$open_bracket = 1;
244
					while ($open_bracket > 0) {
245
						if (!isset($this->pdfdata{$strpos})) {
246
							break;
247
						}
248
						$ch = $this->pdfdata{$strpos};
249
						switch ($ch) {
250
							case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash)
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
251
								// skip next character
252
								++$strpos;
253
								break;
254
							}
255
							case '(': { // LEFT PARENHESIS (28h)
256
								++$open_bracket;
257
								break;
258
							}
259
							case ')': { // RIGHT PARENTHESIS (29h)
260
								--$open_bracket;
261
								break;
262
							}
263
						}
264
						++$strpos;
265
					}
266
					$objval = substr($this->pdfdata, $offset, ($strpos - $offset - 1));
267
					$offset = $strpos;
268
				}
269
				break;
270
			}
271
			case '[':   // \x5B LEFT SQUARE BRACKET
272
			case ']': { // \x5D RIGHT SQUARE BRACKET
273
				// array object
274
				$objtype = $char;
275
				++$offset;
276 View Code Duplication
				if ($char == '[') {
277
					// get array content
278
					$objval = array();
279
					do {
280
						// get element
281
						$element = $this->getRawObject($offset);
282
						$offset = $element[2];
283
						$objval[] = $element;
284
					} while ($element[0] != ']');
285
					// remove closing delimiter
286
					array_pop($objval);
287
				}
288
				break;
289
			}
290
			case '<':   // \x3C LESS-THAN SIGN
291
			case '>': { // \x3E GREATER-THAN SIGN
292
				if (isset($this->pdfdata{($offset + 1)}) AND ($this->pdfdata{($offset + 1)} == $char)) {
293
					// dictionary object
294
					$objtype = $char.$char;
295
					$offset += 2;
296 View Code Duplication
					if ($char == '<') {
297
						// get array content
298
						$objval = array();
299
						do {
300
							// get element
301
							$element = $this->getRawObject($offset);
302
							$offset = $element[2];
303
							$objval[] = $element;
304
						} while ($element[0] != '>>');
305
						// remove closing delimiter
306
						array_pop($objval);
307
					}
308
				} else {
309
					// hexadecimal string object
310
					$objtype = $char;
311
					++$offset;
312 View Code Duplication
					if (($char == '<') AND (preg_match('/^([0-9A-Fa-f]+)[>]/iU', substr($this->pdfdata, $offset), $matches) == 1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
313
						$objval = $matches[1];
314
						$offset += strlen($matches[0]);
315
					}
316
				}
317
				break;
318
			}
319
			default: {
320
				if (substr($this->pdfdata, $offset, 6) == 'endobj') {
321
					// indirect object
322
					$objtype = 'endobj';
323
					$offset += 6;
324 View Code Duplication
				} elseif (substr($this->pdfdata, $offset, 4) == 'null') {
325
					// null object
326
					$objtype = 'null';
327
					$offset += 4;
328
					$objval = 'null';
329
				} elseif (substr($this->pdfdata, $offset, 4) == 'true') {
330
					// boolean true object
331
					$objtype = 'boolean';
332
					$offset += 4;
333
					$objval = 'true';
334 View Code Duplication
				} elseif (substr($this->pdfdata, $offset, 5) == 'false') {
335
					// boolean false object
336
					$objtype = 'boolean';
337
					$offset += 5;
338
					$objval = 'false';
339
				} elseif (substr($this->pdfdata, $offset, 6) == 'stream') {
340
					// start stream object
341
					$objtype = 'stream';
342
					$offset += 6;
343 View Code Duplication
					if (preg_match('/^[\r\n]+(.*)[\r\n]*endstream/isU', substr($this->pdfdata, $offset), $matches) == 1) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
344
						$objval = $matches[1];
345
						$offset += strlen($matches[0]);
346
					}
347
				} elseif (substr($this->pdfdata, $offset, 9) == 'endstream') {
348
					// end stream object
349
					$objtype = 'endstream';
350
					$offset += 9;
351
				} elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
352
					// indirect object reference
353
					$objtype = 'ojbref';
354
					$offset += strlen($matches[0]);
355
					$objval = intval($matches[1]).'_'.intval($matches[2]);
356
				} elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
357
					// object start
358
					$objtype = 'ojb';
359
					$objval = intval($matches[1]).'_'.intval($matches[2]);
360
					$offset += strlen ($matches[0]);
361
				} elseif (($numlen = strspn($this->pdfdata, '+-.0123456789', $offset)) > 0) {
362
					// numeric object
363
					$objtype = 'numeric';
364
					$objval = substr($this->pdfdata, $offset, $numlen);
365
					$offset += $numlen;
366
				}
367
				break;
368
			}
369
		}
370
		return array($objtype, $objval, $offset);
371
	}
372
373
	/**
374
	 * Get content of indirect object.
375
	 * @param $obj_ref (string) Object number and generation number separated by underscore character.
376
	 * @param $offset (int) Object offset.
377
	 * @param $decoding (boolean) If true decode streams.
378
	 * @return array containing object data.
379
	 * @protected
380
	 * @since 1.0.000 (2011-05-24)
381
	 */
382
	protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) {
383
		$obj = explode('_', $obj_ref);
384
		if (($obj === false) OR (count($obj) != 2)) {
385
			$this->Error('Invalid object reference: '.$obj);
386
			return;
387
		}
388
		$objref = $obj[0].' '.$obj[1].' obj';
389
		if (strpos($this->pdfdata, $objref, $offset) != $offset) {
390
			// an indirect reference to an undefined object shall be considered a reference to the null object
391
			return array('null', 'null', $offset);
392
		}
393
		// starting position of object content
394
		$offset += strlen($objref);
395
		// get array of object content
396
		$objdata = array();
397
		$i = 0; // object main index
398
		do {
399
			// get element
400
			$element = $this->getRawObject($offset);
401
			$offset = $element[2];
402
			// decode stream using stream's dictionary information
403
			if ($decoding AND ($element[0] == 'stream') AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == '<<')) {
404
				$element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]);
405
			}
406
			$objdata[$i] = $element;
407
			++$i;
408
		} while ($element[0] != 'endobj');
409
		// remove closing delimiter
410
		array_pop($objdata);
411
		// return raw object content
412
		return $objdata;
413
	}
414
415
	/**
416
	 * Get the content of object, resolving indect object reference if necessary.
417
	 * @param $obj (string) Object value.
418
	 * @return array containing object data.
419
	 * @protected
420
	 * @since 1.0.000 (2011-06-26)
421
	 */
422
	protected function getObjectVal($obj) {
423
		if ($obj[0] == 'objref') {
424
			// reference to indirect object
425
			if (isset($this->objects[$obj[1]])) {
426
				// this object has been already parsed
427
				return $this->objects[$obj[1]];
428
			} elseif (isset($this->xref[$obj[1]])) {
429
				// parse new object
430
				$this->objects[$obj[1]] = $this->getIndirectObject($obj[1], $this->xref[$obj[1]], false);
431
				return $this->objects[$obj[1]];
432
			}
433
		}
434
		return $obj;
435
	}
436
437
	/**
438
	 * Decode the specified stream.
439
	 * @param $sdic (array) Stream's dictionary array.
440
	 * @param $stream (string) Stream to decode.
441
	 * @return array containing decoded stream data and remaining filters.
442
	 * @protected
443
	 * @since 1.0.000 (2011-06-22)
444
	 */
445
	protected function decodeStream($sdic, $stream) {
446
		// get stream lenght and filters
447
		$slength = strlen($stream);
448
		$filters = array();
449
		foreach ($sdic as $k => $v) {
450
			if ($v[0] == '/') {
451
				if (($v[1] == 'Length') AND (isset($sdic[($k + 1)])) AND ($sdic[($k + 1)][0] == 'numeric')) {
452
					// get declared stream lenght
453
					$declength = intval($sdic[($k + 1)][1]);
454
					if ($declength < $slength) {
455
						$stream = substr($stream, 0, $declength);
456
						$slength = $declength;
457
					}
458
				} elseif (($v[1] == 'Filter') AND (isset($sdic[($k + 1)]))) {
459
					// resolve indirect object
460
					$objval = $this->getObjectVal($sdic[($k + 1)]);
461
					if ($objval[0] == '/') {
462
						// single filter
463
						$filters[] = $objval[1];
464
					} elseif ($objval[0] == '[') {
465
						// array of filters
466
						foreach ($objval[1] as $flt) {
467
							if ($flt[0] == '/') {
468
								$filters[] = $flt[1];
469
							}
470
						}
471
					}
472
				}
473
			}
474
		}
475
		// decode the stream
476
		$remaining_filters = array();
477
		foreach ($filters as $filter) {
478
			if (in_array($filter, $this->FilterDecoders->getAvailableFilters())) {
479
				$stream = $this->FilterDecoders->decodeFilter($filter, $stream);
480
			} else {
481
				// add missing filter to array
482
				$remaining_filters[] = $filter;
483
			}
484
		}
485
		return array($stream, $remaining_filters);
486
	}
487
488
	/**
489
	 * This method is automatically called in case of fatal error; it simply outputs the message and halts the execution.
490
	 * @param $msg (string) The error message
491
	 * @public
492
	 * @since 1.0.000 (2011-05-23)
493
	 */
494
	public function Error($msg) {
495
		// exit program and print error
496
		die('<strong>TCPDF_PARSER ERROR: </strong>'.$msg);
497
	}
498
499
} // END OF TCPDF_PARSER CLASS
500
501
//============================================================+
502
// END OF FILE
503
//============================================================+
504