1
|
|
|
<?php |
2
|
|
|
//============================================================+ |
3
|
|
|
// File name : tcpdf_parser.php |
4
|
|
|
// Version : 1.0.014 |
5
|
|
|
// Begin : 2011-05-23 |
6
|
|
|
// Last Update : 2014-02-18 |
7
|
|
|
// Author : Nicola Asuni - Tecnick.com LTD - www.tecnick.com - [email protected] |
8
|
|
|
// License : http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT GNU-LGPLv3 |
9
|
|
|
// ------------------------------------------------------------------- |
10
|
|
|
// Copyright (C) 2011-2014 Nicola Asuni - Tecnick.com LTD |
11
|
|
|
// |
12
|
|
|
// This file is part of TCPDF software library. |
13
|
|
|
// |
14
|
|
|
// TCPDF is free software: you can redistribute it and/or modify it |
15
|
|
|
// under the terms of the GNU Lesser General Public License as |
16
|
|
|
// published by the Free Software Foundation, either version 3 of the |
17
|
|
|
// License, or (at your option) any later version. |
18
|
|
|
// |
19
|
|
|
// TCPDF is distributed in the hope that it will be useful, but |
20
|
|
|
// WITHOUT ANY WARRANTY; without even the implied warranty of |
21
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
22
|
|
|
// See the GNU Lesser General Public License for more details. |
23
|
|
|
// |
24
|
|
|
// You should have received a copy of the License |
25
|
|
|
// along with TCPDF. If not, see |
26
|
|
|
// <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>. |
27
|
|
|
// |
28
|
|
|
// See LICENSE.TXT file for more information. |
29
|
|
|
// ------------------------------------------------------------------- |
30
|
|
|
// |
31
|
|
|
// Description : This is a PHP class for parsing PDF documents. |
32
|
|
|
// |
33
|
|
|
//============================================================+ |
34
|
|
|
|
35
|
|
|
/** |
36
|
|
|
* @file |
37
|
|
|
* This is a PHP class for parsing PDF documents.<br> |
38
|
|
|
* @package com.tecnick.tcpdf |
39
|
|
|
* @author Nicola Asuni |
40
|
|
|
* @version 1.0.014 |
41
|
|
|
*/ |
42
|
|
|
|
43
|
|
|
// include class for decoding filters |
44
|
|
|
require_once(dirname(__FILE__).'/include/tcpdf_filters.php'); |
45
|
|
|
|
46
|
|
|
/** |
47
|
|
|
* @class TCPDF_PARSER |
48
|
|
|
* This is a PHP class for parsing PDF documents.<br> |
49
|
|
|
* @package com.tecnick.tcpdf |
50
|
|
|
* @brief This is a PHP class for parsing PDF documents.. |
51
|
|
|
* @version 1.0.010 |
52
|
|
|
* @author Nicola Asuni - [email protected] |
53
|
|
|
*/ |
54
|
|
|
class TCPDF_PARSER { |
55
|
|
|
|
56
|
|
|
/** |
57
|
|
|
* Raw content of the PDF document. |
58
|
|
|
* @private |
59
|
|
|
*/ |
60
|
|
|
private $pdfdata = ''; |
61
|
|
|
|
62
|
|
|
/** |
63
|
|
|
* XREF data. |
64
|
|
|
* @protected |
65
|
|
|
*/ |
66
|
|
|
protected $xref = array(); |
67
|
|
|
|
68
|
|
|
/** |
69
|
|
|
* Array of PDF objects. |
70
|
|
|
* @protected |
71
|
|
|
*/ |
72
|
|
|
protected $objects = array(); |
73
|
|
|
|
74
|
|
|
/** |
75
|
|
|
* Class object for decoding filters. |
76
|
|
|
* @private |
77
|
|
|
*/ |
78
|
|
|
private $FilterDecoders; |
79
|
|
|
|
80
|
|
|
/** |
81
|
|
|
* Array of configuration parameters. |
82
|
|
|
* @private |
83
|
|
|
*/ |
84
|
|
|
private $cfg = array( |
85
|
|
|
'die_for_errors' => false, |
86
|
|
|
'ignore_filter_decoding_errors' => true, |
87
|
|
|
'ignore_missing_filter_decoders' => true, |
88
|
|
|
); |
89
|
|
|
|
90
|
|
|
// ----------------------------------------------------------------------------- |
91
|
|
|
|
92
|
|
|
/** |
93
|
|
|
* Parse a PDF document an return an array of objects. |
94
|
|
|
* @param $data (string) PDF data to parse. |
95
|
|
|
* @param $cfg (array) Array of configuration parameters: |
96
|
|
|
* 'die_for_errors' : if true termitate the program execution in case of error, otherwise thows an exception; |
97
|
|
|
* 'ignore_filter_decoding_errors' : if true ignore filter decoding errors; |
98
|
|
|
* 'ignore_missing_filter_decoders' : if true ignore missing filter decoding errors. |
99
|
|
|
* @public |
100
|
|
|
* @since 1.0.000 (2011-05-24) |
101
|
|
|
*/ |
102
|
|
|
public function __construct($data, $cfg=array()) { |
103
|
|
|
if (empty($data)) { |
104
|
|
|
$this->Error('Empty PDF data.'); |
105
|
|
|
} |
106
|
|
|
// find the pdf header starting position |
107
|
|
|
if (($trimpos = strpos($data, '%PDF-')) === FALSE) { |
108
|
|
|
$this->Error('Invalid PDF data: missing %PDF header.'); |
109
|
|
|
} |
110
|
|
|
// get PDF content string |
111
|
|
|
$this->pdfdata = substr($data, $trimpos); |
112
|
|
|
// get length |
113
|
|
|
$pdflen = strlen($this->pdfdata); |
114
|
|
|
// set configuration parameters |
115
|
|
|
$this->setConfig($cfg); |
116
|
|
|
// get xref and trailer data |
117
|
|
|
$this->xref = $this->getXrefData(); |
118
|
|
|
// parse all document objects |
119
|
|
|
$this->objects = array(); |
120
|
|
|
foreach ($this->xref['xref'] as $obj => $offset) { |
121
|
|
|
if (!isset($this->objects[$obj]) AND ($offset > 0)) { |
122
|
|
|
// decode objects with positive offset |
123
|
|
|
$this->objects[$obj] = $this->getIndirectObject($obj, $offset, true); |
124
|
|
|
} |
125
|
|
|
} |
126
|
|
|
// release some memory |
127
|
|
|
unset($this->pdfdata); |
128
|
|
|
$this->pdfdata = ''; |
129
|
|
|
} |
130
|
|
|
|
131
|
|
|
/** |
132
|
|
|
* Set the configuration parameters. |
133
|
|
|
* @param $cfg (array) Array of configuration parameters: |
134
|
|
|
* 'die_for_errors' : if true termitate the program execution in case of error, otherwise thows an exception; |
135
|
|
|
* 'ignore_filter_decoding_errors' : if true ignore filter decoding errors; |
136
|
|
|
* 'ignore_missing_filter_decoders' : if true ignore missing filter decoding errors. |
137
|
|
|
* @public |
138
|
|
|
*/ |
139
|
|
|
protected function setConfig($cfg) { |
140
|
|
|
if (isset($cfg['die_for_errors'])) { |
141
|
|
|
$this->cfg['die_for_errors'] = !!$cfg['die_for_errors']; |
142
|
|
|
} |
143
|
|
|
if (isset($cfg['ignore_filter_decoding_errors'])) { |
144
|
|
|
$this->cfg['ignore_filter_decoding_errors'] = !!$cfg['ignore_filter_decoding_errors']; |
145
|
|
|
} |
146
|
|
|
if (isset($cfg['ignore_missing_filter_decoders'])) { |
147
|
|
|
$this->cfg['ignore_missing_filter_decoders'] = !!$cfg['ignore_missing_filter_decoders']; |
148
|
|
|
} |
149
|
|
|
} |
150
|
|
|
|
151
|
|
|
/** |
152
|
|
|
* Return an array of parsed PDF document objects. |
153
|
|
|
* @return (array) Array of parsed PDF document objects. |
154
|
|
|
* @public |
155
|
|
|
* @since 1.0.000 (2011-06-26) |
156
|
|
|
*/ |
157
|
|
|
public function getParsedData() { |
158
|
|
|
return array($this->xref, $this->objects); |
159
|
|
|
} |
160
|
|
|
|
161
|
|
|
/** |
162
|
|
|
* Get Cross-Reference (xref) table and trailer data from PDF document data. |
163
|
|
|
* @param $offset (int) xref offset (if know). |
164
|
|
|
* @param $xref (array) previous xref array (if any). |
165
|
|
|
* @return Array containing xref and trailer data. |
166
|
|
|
* @protected |
167
|
|
|
* @since 1.0.000 (2011-05-24) |
168
|
|
|
*/ |
169
|
|
|
protected function getXrefData($offset=0, $xref=array()) { |
170
|
|
|
if ($offset == 0) { |
171
|
|
|
// find last startxref |
172
|
|
|
if (preg_match_all('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_SET_ORDER, $offset) == 0) { |
173
|
|
|
$this->Error('Unable to find startxref'); |
174
|
|
|
} |
175
|
|
|
$matches = array_pop($matches); |
176
|
|
|
$startxref = $matches[1]; |
177
|
|
|
} elseif (strpos($this->pdfdata, 'xref', $offset) == $offset) { |
178
|
|
|
// Already pointing at the xref table |
179
|
|
|
$startxref = $offset; |
180
|
|
|
} elseif (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) { |
181
|
|
|
// Cross-Reference Stream object |
182
|
|
|
$startxref = $offset; |
183
|
|
|
} elseif (preg_match('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) { |
184
|
|
|
// startxref found |
185
|
|
|
$startxref = $matches[1][0]; |
186
|
|
|
} else { |
187
|
|
|
$this->Error('Unable to find startxref'); |
188
|
|
|
} |
189
|
|
|
// check xref position |
190
|
|
|
if (strpos($this->pdfdata, 'xref', $startxref) == $startxref) { |
191
|
|
|
// Cross-Reference |
192
|
|
|
$xref = $this->decodeXref($startxref, $xref); |
193
|
|
|
} else { |
194
|
|
|
// Cross-Reference Stream |
195
|
|
|
$xref = $this->decodeXrefStream($startxref, $xref); |
196
|
|
|
} |
197
|
|
|
if (empty($xref)) { |
198
|
|
|
$this->Error('Unable to find xref'); |
199
|
|
|
} |
200
|
|
|
return $xref; |
201
|
|
|
} |
202
|
|
|
|
203
|
|
|
/** |
204
|
|
|
* Decode the Cross-Reference section |
205
|
|
|
* @param $startxref (int) Offset at which the xref section starts (position of the 'xref' keyword). |
206
|
|
|
* @param $xref (array) Previous xref array (if any). |
207
|
|
|
* @return Array containing xref and trailer data. |
208
|
|
|
* @protected |
209
|
|
|
* @since 1.0.000 (2011-06-20) |
210
|
|
|
*/ |
211
|
|
|
protected function decodeXref($startxref, $xref=array()) { |
212
|
|
|
$startxref += 4; // 4 is the lenght of the word 'xref' |
213
|
|
|
// skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP) |
214
|
|
|
$offset = $startxref + strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $startxref); |
215
|
|
|
// initialize object number |
216
|
|
|
$obj_num = 0; |
217
|
|
|
// search for cross-reference entries or subsection |
218
|
|
|
while (preg_match('/([0-9]+)[\x20]([0-9]+)[\x20]?([nf]?)(\r\n|[\x20]?[\r\n])/', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) { |
219
|
|
|
if ($matches[0][1] != $offset) { |
220
|
|
|
// we are on another section |
221
|
|
|
break; |
222
|
|
|
} |
223
|
|
|
$offset += strlen($matches[0][0]); |
224
|
|
|
if ($matches[3][0] == 'n') { |
225
|
|
|
// create unique object index: [object number]_[generation number] |
226
|
|
|
$index = $obj_num.'_'.intval($matches[2][0]); |
227
|
|
|
// check if object already exist |
228
|
|
|
if (!isset($xref['xref'][$index])) { |
229
|
|
|
// store object offset position |
230
|
|
|
$xref['xref'][$index] = intval($matches[1][0]); |
231
|
|
|
} |
232
|
|
|
++$obj_num; |
233
|
|
|
} elseif ($matches[3][0] == 'f') { |
234
|
|
|
++$obj_num; |
235
|
|
|
} else { |
236
|
|
|
// object number (index) |
237
|
|
|
$obj_num = intval($matches[1][0]); |
238
|
|
|
} |
239
|
|
|
} |
240
|
|
|
// get trailer data |
241
|
|
|
if (preg_match('/trailer[\s]*<<(.*)>>/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) { |
242
|
|
|
$trailer_data = $matches[1][0]; |
243
|
|
|
if (!isset($xref['trailer']) OR empty($xref['trailer'])) { |
244
|
|
|
// get only the last updated version |
245
|
|
|
$xref['trailer'] = array(); |
246
|
|
|
// parse trailer_data |
247
|
|
|
if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) { |
248
|
|
|
$xref['trailer']['size'] = intval($matches[1]); |
249
|
|
|
} |
250
|
|
View Code Duplication |
if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { |
251
|
|
|
$xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]); |
252
|
|
|
} |
253
|
|
View Code Duplication |
if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { |
254
|
|
|
$xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]); |
255
|
|
|
} |
256
|
|
View Code Duplication |
if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { |
257
|
|
|
$xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]); |
258
|
|
|
} |
259
|
|
|
if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) { |
260
|
|
|
$xref['trailer']['id'] = array(); |
261
|
|
|
$xref['trailer']['id'][0] = $matches[1]; |
262
|
|
|
$xref['trailer']['id'][1] = $matches[2]; |
263
|
|
|
} |
264
|
|
|
} |
265
|
|
|
if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) { |
266
|
|
|
// get previous xref |
267
|
|
|
$xref = $this->getXrefData(intval($matches[1]), $xref); |
268
|
|
|
} |
269
|
|
|
} else { |
270
|
|
|
$this->Error('Unable to find trailer'); |
271
|
|
|
} |
272
|
|
|
return $xref; |
273
|
|
|
} |
274
|
|
|
|
275
|
|
|
/** |
276
|
|
|
* Decode the Cross-Reference Stream section |
277
|
|
|
* @param $startxref (int) Offset at which the xref section starts. |
278
|
|
|
* @param $xref (array) Previous xref array (if any). |
279
|
|
|
* @return Array containing xref and trailer data. |
280
|
|
|
* @protected |
281
|
|
|
* @since 1.0.003 (2013-03-16) |
282
|
|
|
*/ |
283
|
|
|
protected function decodeXrefStream($startxref, $xref=array()) { |
284
|
|
|
// try to read Cross-Reference Stream |
285
|
|
|
$xrefobj = $this->getRawObject($startxref); |
286
|
|
|
$xrefcrs = $this->getIndirectObject($xrefobj[1], $startxref, true); |
287
|
|
|
if (!isset($xref['trailer']) OR empty($xref['trailer'])) { |
288
|
|
|
// get only the last updated version |
289
|
|
|
$xref['trailer'] = array(); |
290
|
|
|
$filltrailer = true; |
291
|
|
|
} else { |
292
|
|
|
$filltrailer = false; |
293
|
|
|
} |
294
|
|
|
if (!isset($xref['xref'])) { |
295
|
|
|
$xref['xref'] = array(); |
296
|
|
|
} |
297
|
|
|
$valid_crs = false; |
298
|
|
|
$columns = 0; |
299
|
|
|
$sarr = $xrefcrs[0][1]; |
300
|
|
|
foreach ($sarr as $k => $v) { |
301
|
|
|
if (($v[0] == '/') AND ($v[1] == 'Type') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == '/') AND ($sarr[($k +1)][1] == 'XRef'))) { |
302
|
|
|
$valid_crs = true; |
303
|
|
|
} elseif (($v[0] == '/') AND ($v[1] == 'Index') AND (isset($sarr[($k +1)]))) { |
304
|
|
|
// first object number in the subsection |
305
|
|
|
$index_first = intval($sarr[($k +1)][1][0][1]); |
306
|
|
|
// number of entries in the subsection |
307
|
|
|
$index_entries = intval($sarr[($k +1)][1][1][1]); |
308
|
|
View Code Duplication |
} elseif (($v[0] == '/') AND ($v[1] == 'Prev') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'numeric'))) { |
309
|
|
|
// get previous xref offset |
310
|
|
|
$prevxref = intval($sarr[($k +1)][1]); |
311
|
|
|
} elseif (($v[0] == '/') AND ($v[1] == 'W') AND (isset($sarr[($k +1)]))) { |
312
|
|
|
// number of bytes (in the decoded stream) of the corresponding field |
313
|
|
|
$wb = array(); |
314
|
|
|
$wb[0] = intval($sarr[($k +1)][1][0][1]); |
315
|
|
|
$wb[1] = intval($sarr[($k +1)][1][1][1]); |
316
|
|
|
$wb[2] = intval($sarr[($k +1)][1][2][1]); |
317
|
|
|
} elseif (($v[0] == '/') AND ($v[1] == 'DecodeParms') AND (isset($sarr[($k +1)][1]))) { |
318
|
|
|
$decpar = $sarr[($k +1)][1]; |
319
|
|
|
foreach ($decpar as $kdc => $vdc) { |
320
|
|
|
if (($vdc[0] == '/') AND ($vdc[1] == 'Columns') AND (isset($decpar[($kdc +1)]) AND ($decpar[($kdc +1)][0] == 'numeric'))) { |
321
|
|
|
$columns = intval($decpar[($kdc +1)][1]); |
322
|
|
View Code Duplication |
} elseif (($vdc[0] == '/') AND ($vdc[1] == 'Predictor') AND (isset($decpar[($kdc +1)]) AND ($decpar[($kdc +1)][0] == 'numeric'))) { |
323
|
|
|
$predictor = intval($decpar[($kdc +1)][1]); |
324
|
|
|
} |
325
|
|
|
} |
326
|
|
|
} elseif ($filltrailer) { |
327
|
|
|
if (($v[0] == '/') AND ($v[1] == 'Size') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'numeric'))) { |
328
|
|
|
$xref['trailer']['size'] = $sarr[($k +1)][1]; |
329
|
|
View Code Duplication |
} elseif (($v[0] == '/') AND ($v[1] == 'Root') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) { |
330
|
|
|
$xref['trailer']['root'] = $sarr[($k +1)][1]; |
331
|
|
|
} elseif (($v[0] == '/') AND ($v[1] == 'Info') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) { |
332
|
|
|
$xref['trailer']['info'] = $sarr[($k +1)][1]; |
333
|
|
View Code Duplication |
} elseif (($v[0] == '/') AND ($v[1] == 'Encrypt') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) { |
334
|
|
|
$xref['trailer']['encrypt'] = $sarr[($k +1)][1]; |
335
|
|
|
} elseif (($v[0] == '/') AND ($v[1] == 'ID') AND (isset($sarr[($k +1)]))) { |
336
|
|
|
$xref['trailer']['id'] = array(); |
337
|
|
|
$xref['trailer']['id'][0] = $sarr[($k +1)][1][0][1]; |
338
|
|
|
$xref['trailer']['id'][1] = $sarr[($k +1)][1][1][1]; |
339
|
|
|
} |
340
|
|
|
} |
341
|
|
|
} |
342
|
|
|
// decode data |
343
|
|
|
if ($valid_crs AND isset($xrefcrs[1][3][0])) { |
344
|
|
|
// number of bytes in a row |
345
|
|
|
$rowlen = ($columns + 1); |
346
|
|
|
// convert the stream into an array of integers |
347
|
|
|
$sdata = unpack('C*', $xrefcrs[1][3][0]); |
348
|
|
|
// split the rows |
349
|
|
|
$sdata = array_chunk($sdata, $rowlen); |
350
|
|
|
// initialize decoded array |
351
|
|
|
$ddata = array(); |
352
|
|
|
// initialize first row with zeros |
353
|
|
|
$prev_row = array_fill (0, $rowlen, 0); |
354
|
|
|
// for each row apply PNG unpredictor |
355
|
|
|
foreach ($sdata as $k => $row) { |
356
|
|
|
// initialize new row |
357
|
|
|
$ddata[$k] = array(); |
358
|
|
|
// get PNG predictor value |
359
|
|
|
$predictor = (10 + $row[0]); |
360
|
|
|
// for each byte on the row |
361
|
|
|
for ($i=1; $i<=$columns; ++$i) { |
362
|
|
|
// new index |
363
|
|
|
$j = ($i - 1); |
364
|
|
|
$row_up = $prev_row[$j]; |
365
|
|
|
if ($i == 1) { |
366
|
|
|
$row_left = 0; |
367
|
|
|
$row_upleft = 0; |
368
|
|
|
} else { |
369
|
|
|
$row_left = $row[($i - 1)]; |
370
|
|
|
$row_upleft = $prev_row[($j - 1)]; |
371
|
|
|
} |
372
|
|
|
switch ($predictor) { |
373
|
|
|
case 10: { // PNG prediction (on encoding, PNG None on all rows) |
374
|
|
|
$ddata[$k][$j] = $row[$i]; |
375
|
|
|
break; |
376
|
|
|
} |
377
|
|
|
case 11: { // PNG prediction (on encoding, PNG Sub on all rows) |
378
|
|
|
$ddata[$k][$j] = (($row[$i] + $row_left) & 0xff); |
379
|
|
|
break; |
380
|
|
|
} |
381
|
|
View Code Duplication |
case 12: { // PNG prediction (on encoding, PNG Up on all rows) |
382
|
|
|
$ddata[$k][$j] = (($row[$i] + $row_up) & 0xff); |
383
|
|
|
break; |
384
|
|
|
} |
385
|
|
View Code Duplication |
case 13: { // PNG prediction (on encoding, PNG Average on all rows) |
386
|
|
|
$ddata[$k][$j] = (($row[$i] + (($row_left + $row_up) / 2)) & 0xff); |
387
|
|
|
break; |
388
|
|
|
} |
389
|
|
|
case 14: { // PNG prediction (on encoding, PNG Paeth on all rows) |
390
|
|
|
// initial estimate |
391
|
|
|
$p = ($row_left + $row_up - $row_upleft); |
392
|
|
|
// distances |
393
|
|
|
$pa = abs($p - $row_left); |
394
|
|
|
$pb = abs($p - $row_up); |
395
|
|
|
$pc = abs($p - $row_upleft); |
396
|
|
|
$pmin = min($pa, $pb, $pc); |
397
|
|
|
// return minumum distance |
398
|
|
|
switch ($pmin) { |
399
|
|
|
case $pa: { |
400
|
|
|
$ddata[$k][$j] = (($row[$i] + $row_left) & 0xff); |
401
|
|
|
break; |
402
|
|
|
} |
403
|
|
View Code Duplication |
case $pb: { |
404
|
|
|
$ddata[$k][$j] = (($row[$i] + $row_up) & 0xff); |
405
|
|
|
break; |
406
|
|
|
} |
407
|
|
|
case $pc: { |
408
|
|
|
$ddata[$k][$j] = (($row[$i] + $row_upleft) & 0xff); |
409
|
|
|
break; |
410
|
|
|
} |
411
|
|
|
} |
412
|
|
|
break; |
413
|
|
|
} |
414
|
|
|
default: { // PNG prediction (on encoding, PNG optimum) |
415
|
|
|
$this->Error('Unknown PNG predictor'); |
416
|
|
|
break; |
417
|
|
|
} |
418
|
|
|
} |
419
|
|
|
} |
420
|
|
|
$prev_row = $ddata[$k]; |
421
|
|
|
} // end for each row |
422
|
|
|
// complete decoding |
423
|
|
|
$sdata = array(); |
424
|
|
|
// for every row |
425
|
|
|
foreach ($ddata as $k => $row) { |
426
|
|
|
// initialize new row |
427
|
|
|
$sdata[$k] = array(0, 0, 0); |
428
|
|
|
if ($wb[0] == 0) { |
429
|
|
|
// default type field |
430
|
|
|
$sdata[$k][0] = 1; |
431
|
|
|
} |
432
|
|
|
$i = 0; // count bytes in the row |
433
|
|
|
// for every column |
434
|
|
|
for ($c = 0; $c < 3; ++$c) { |
435
|
|
|
// for every byte on the column |
436
|
|
|
for ($b = 0; $b < $wb[$c]; ++$b) { |
437
|
|
|
if (isset($row[$i])) { |
438
|
|
|
$sdata[$k][$c] += ($row[$i] << (($wb[$c] - 1 - $b) * 8)); |
439
|
|
|
} |
440
|
|
|
++$i; |
441
|
|
|
} |
442
|
|
|
} |
443
|
|
|
} |
444
|
|
|
$ddata = array(); |
445
|
|
|
// fill xref |
446
|
|
|
if (isset($index_first)) { |
447
|
|
|
$obj_num = $index_first; |
448
|
|
|
} else { |
449
|
|
|
$obj_num = 0; |
450
|
|
|
} |
451
|
|
|
foreach ($sdata as $k => $row) { |
452
|
|
|
switch ($row[0]) { |
453
|
|
|
case 0: { // (f) linked list of free objects |
454
|
|
|
break; |
455
|
|
|
} |
456
|
|
|
case 1: { // (n) objects that are in use but are not compressed |
457
|
|
|
// create unique object index: [object number]_[generation number] |
458
|
|
|
$index = $obj_num.'_'.$row[2]; |
459
|
|
|
// check if object already exist |
460
|
|
|
if (!isset($xref['xref'][$index])) { |
461
|
|
|
// store object offset position |
462
|
|
|
$xref['xref'][$index] = $row[1]; |
463
|
|
|
} |
464
|
|
|
break; |
465
|
|
|
} |
466
|
|
|
case 2: { // compressed objects |
467
|
|
|
// $row[1] = object number of the object stream in which this object is stored |
468
|
|
|
// $row[2] = index of this object within the object stream |
469
|
|
|
$index = $row[1].'_0_'.$row[2]; |
470
|
|
|
$xref['xref'][$index] = -1; |
471
|
|
|
break; |
472
|
|
|
} |
473
|
|
|
default: { // null objects |
474
|
|
|
break; |
475
|
|
|
} |
476
|
|
|
} |
477
|
|
|
++$obj_num; |
478
|
|
|
} |
479
|
|
|
} // end decoding data |
480
|
|
|
if (isset($prevxref)) { |
481
|
|
|
// get previous xref |
482
|
|
|
$xref = $this->getXrefData($prevxref, $xref); |
483
|
|
|
} |
484
|
|
|
return $xref; |
485
|
|
|
} |
486
|
|
|
|
487
|
|
|
/** |
488
|
|
|
* Get object type, raw value and offset to next object |
489
|
|
|
* @param $offset (int) Object offset. |
490
|
|
|
* @return array containing object type, raw value and offset to next object |
491
|
|
|
* @protected |
492
|
|
|
* @since 1.0.000 (2011-06-20) |
493
|
|
|
*/ |
494
|
|
|
protected function getRawObject($offset=0) { |
495
|
|
|
$objtype = ''; // object type to be returned |
496
|
|
|
$objval = ''; // object value to be returned |
497
|
|
|
// skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP) |
498
|
|
|
$offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset); |
499
|
|
|
// get first char |
500
|
|
|
$char = $this->pdfdata[$offset]; |
501
|
|
|
// get object type |
502
|
|
|
switch ($char) { |
503
|
|
|
case '%': { // \x25 PERCENT SIGN |
504
|
|
|
// skip comment and search for next token |
505
|
|
|
$next = strcspn($this->pdfdata, "\r\n", $offset); |
506
|
|
|
if ($next > 0) { |
507
|
|
|
$offset += $next; |
508
|
|
|
return $this->getRawObject($offset); |
509
|
|
|
} |
510
|
|
|
break; |
511
|
|
|
} |
512
|
|
|
case '/': { // \x2F SOLIDUS |
513
|
|
|
// name object |
514
|
|
|
$objtype = $char; |
515
|
|
|
++$offset; |
516
|
|
|
if (preg_match('/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/', substr($this->pdfdata, $offset, 256), $matches) == 1) { |
517
|
|
|
$objval = $matches[1]; // unescaped value |
518
|
|
|
$offset += strlen($objval); |
519
|
|
|
} |
520
|
|
|
break; |
521
|
|
|
} |
522
|
|
|
case '(': // \x28 LEFT PARENTHESIS |
523
|
|
|
case ')': { // \x29 RIGHT PARENTHESIS |
524
|
|
|
// literal string object |
525
|
|
|
$objtype = $char; |
526
|
|
|
++$offset; |
527
|
|
|
$strpos = $offset; |
528
|
|
|
if ($char == '(') { |
529
|
|
|
$open_bracket = 1; |
530
|
|
|
while ($open_bracket > 0) { |
531
|
|
|
if (!isset($this->pdfdata{$strpos})) { |
532
|
|
|
break; |
533
|
|
|
} |
534
|
|
|
$ch = $this->pdfdata{$strpos}; |
535
|
|
|
switch ($ch) { |
536
|
|
|
case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash) |
|
|
|
|
537
|
|
|
// skip next character |
538
|
|
|
++$strpos; |
539
|
|
|
break; |
540
|
|
|
} |
541
|
|
|
case '(': { // LEFT PARENHESIS (28h) |
542
|
|
|
++$open_bracket; |
543
|
|
|
break; |
544
|
|
|
} |
545
|
|
|
case ')': { // RIGHT PARENTHESIS (29h) |
546
|
|
|
--$open_bracket; |
547
|
|
|
break; |
548
|
|
|
} |
549
|
|
|
} |
550
|
|
|
++$strpos; |
551
|
|
|
} |
552
|
|
|
$objval = substr($this->pdfdata, $offset, ($strpos - $offset - 1)); |
553
|
|
|
$offset = $strpos; |
554
|
|
|
} |
555
|
|
|
break; |
556
|
|
|
} |
557
|
|
|
case '[': // \x5B LEFT SQUARE BRACKET |
558
|
|
|
case ']': { // \x5D RIGHT SQUARE BRACKET |
559
|
|
|
// array object |
560
|
|
|
$objtype = $char; |
561
|
|
|
++$offset; |
562
|
|
View Code Duplication |
if ($char == '[') { |
563
|
|
|
// get array content |
564
|
|
|
$objval = array(); |
565
|
|
|
do { |
566
|
|
|
// get element |
567
|
|
|
$element = $this->getRawObject($offset); |
568
|
|
|
$offset = $element[2]; |
569
|
|
|
$objval[] = $element; |
570
|
|
|
} while ($element[0] != ']'); |
571
|
|
|
// remove closing delimiter |
572
|
|
|
array_pop($objval); |
573
|
|
|
} |
574
|
|
|
break; |
575
|
|
|
} |
576
|
|
|
case '<': // \x3C LESS-THAN SIGN |
577
|
|
|
case '>': { // \x3E GREATER-THAN SIGN |
578
|
|
|
if (isset($this->pdfdata{($offset + 1)}) AND ($this->pdfdata{($offset + 1)} == $char)) { |
579
|
|
|
// dictionary object |
580
|
|
|
$objtype = $char.$char; |
581
|
|
|
$offset += 2; |
582
|
|
View Code Duplication |
if ($char == '<') { |
583
|
|
|
// get array content |
584
|
|
|
$objval = array(); |
585
|
|
|
do { |
586
|
|
|
// get element |
587
|
|
|
$element = $this->getRawObject($offset); |
588
|
|
|
$offset = $element[2]; |
589
|
|
|
$objval[] = $element; |
590
|
|
|
} while ($element[0] != '>>'); |
591
|
|
|
// remove closing delimiter |
592
|
|
|
array_pop($objval); |
593
|
|
|
} |
594
|
|
|
} else { |
595
|
|
|
// hexadecimal string object |
596
|
|
|
$objtype = $char; |
597
|
|
|
++$offset; |
598
|
|
|
if (($char == '<') AND (preg_match('/^([0-9A-Fa-f\x09\x0a\x0c\x0d\x20]+)>/iU', substr($this->pdfdata, $offset), $matches) == 1)) { |
599
|
|
|
// remove white space characters |
600
|
|
|
$objval = strtr($matches[1], "\x09\x0a\x0c\x0d\x20", ''); |
601
|
|
|
$offset += strlen($matches[0]); |
602
|
|
|
} elseif (($endpos = strpos($this->pdfdata, '>', $offset)) !== FALSE) { |
603
|
|
|
$offset = $endpos + 1; |
604
|
|
|
} |
605
|
|
|
} |
606
|
|
|
break; |
607
|
|
|
} |
608
|
|
|
default: { |
609
|
|
|
if (substr($this->pdfdata, $offset, 6) == 'endobj') { |
610
|
|
|
// indirect object |
611
|
|
|
$objtype = 'endobj'; |
612
|
|
|
$offset += 6; |
613
|
|
View Code Duplication |
} elseif (substr($this->pdfdata, $offset, 4) == 'null') { |
614
|
|
|
// null object |
615
|
|
|
$objtype = 'null'; |
616
|
|
|
$offset += 4; |
617
|
|
|
$objval = 'null'; |
618
|
|
|
} elseif (substr($this->pdfdata, $offset, 4) == 'true') { |
619
|
|
|
// boolean true object |
620
|
|
|
$objtype = 'boolean'; |
621
|
|
|
$offset += 4; |
622
|
|
|
$objval = 'true'; |
623
|
|
View Code Duplication |
} elseif (substr($this->pdfdata, $offset, 5) == 'false') { |
624
|
|
|
// boolean false object |
625
|
|
|
$objtype = 'boolean'; |
626
|
|
|
$offset += 5; |
627
|
|
|
$objval = 'false'; |
628
|
|
|
} elseif (substr($this->pdfdata, $offset, 6) == 'stream') { |
629
|
|
|
// start stream object |
630
|
|
|
$objtype = 'stream'; |
631
|
|
|
$offset += 6; |
632
|
|
|
if (preg_match('/^([\r]?[\n])/isU', substr($this->pdfdata, $offset), $matches) == 1) { |
633
|
|
|
$offset += strlen($matches[0]); |
634
|
|
|
if (preg_match('/(endstream)[\x09\x0a\x0c\x0d\x20]/isU', substr($this->pdfdata, $offset), $matches, PREG_OFFSET_CAPTURE) == 1) { |
635
|
|
|
$objval = substr($this->pdfdata, $offset, $matches[0][1]); |
636
|
|
|
$offset += $matches[1][1]; |
637
|
|
|
} |
638
|
|
|
} |
639
|
|
|
} elseif (substr($this->pdfdata, $offset, 9) == 'endstream') { |
640
|
|
|
// end stream object |
641
|
|
|
$objtype = 'endstream'; |
642
|
|
|
$offset += 9; |
643
|
|
|
} elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) { |
644
|
|
|
// indirect object reference |
645
|
|
|
$objtype = 'objref'; |
646
|
|
|
$offset += strlen($matches[0]); |
647
|
|
|
$objval = intval($matches[1]).'_'.intval($matches[2]); |
648
|
|
|
} elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) { |
649
|
|
|
// object start |
650
|
|
|
$objtype = 'obj'; |
651
|
|
|
$objval = intval($matches[1]).'_'.intval($matches[2]); |
652
|
|
|
$offset += strlen ($matches[0]); |
653
|
|
|
} elseif (($numlen = strspn($this->pdfdata, '+-.0123456789', $offset)) > 0) { |
654
|
|
|
// numeric object |
655
|
|
|
$objtype = 'numeric'; |
656
|
|
|
$objval = substr($this->pdfdata, $offset, $numlen); |
657
|
|
|
$offset += $numlen; |
658
|
|
|
} |
659
|
|
|
break; |
660
|
|
|
} |
661
|
|
|
} |
662
|
|
|
return array($objtype, $objval, $offset); |
663
|
|
|
} |
664
|
|
|
|
665
|
|
|
/** |
666
|
|
|
* Get content of indirect object. |
667
|
|
|
* @param $obj_ref (string) Object number and generation number separated by underscore character. |
668
|
|
|
* @param $offset (int) Object offset. |
669
|
|
|
* @param $decoding (boolean) If true decode streams. |
670
|
|
|
* @return array containing object data. |
671
|
|
|
* @protected |
672
|
|
|
* @since 1.0.000 (2011-05-24) |
673
|
|
|
*/ |
674
|
|
|
protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) { |
675
|
|
|
$obj = explode('_', $obj_ref); |
676
|
|
|
if (($obj === false) OR (count($obj) != 2)) { |
677
|
|
|
$this->Error('Invalid object reference: '.$obj); |
678
|
|
|
return; |
679
|
|
|
} |
680
|
|
|
$objref = $obj[0].' '.$obj[1].' obj'; |
681
|
|
|
// ignore leading zeros |
682
|
|
|
$offset += strspn($this->pdfdata, '0', $offset); |
683
|
|
|
if (strpos($this->pdfdata, $objref, $offset) != $offset) { |
684
|
|
|
// an indirect reference to an undefined object shall be considered a reference to the null object |
685
|
|
|
return array('null', 'null', $offset); |
686
|
|
|
} |
687
|
|
|
// starting position of object content |
688
|
|
|
$offset += strlen($objref); |
689
|
|
|
// get array of object content |
690
|
|
|
$objdata = array(); |
691
|
|
|
$i = 0; // object main index |
692
|
|
|
do { |
693
|
|
|
// get element |
694
|
|
|
$element = $this->getRawObject($offset); |
695
|
|
|
$offset = $element[2]; |
696
|
|
|
// decode stream using stream's dictionary information |
697
|
|
|
if ($decoding AND ($element[0] == 'stream') AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == '<<')) { |
698
|
|
|
$element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]); |
699
|
|
|
} |
700
|
|
|
$objdata[$i] = $element; |
701
|
|
|
++$i; |
702
|
|
|
} while ($element[0] != 'endobj'); |
703
|
|
|
// remove closing delimiter |
704
|
|
|
array_pop($objdata); |
705
|
|
|
// return raw object content |
706
|
|
|
return $objdata; |
707
|
|
|
} |
708
|
|
|
|
709
|
|
|
/** |
710
|
|
|
* Get the content of object, resolving indect object reference if necessary. |
711
|
|
|
* @param $obj (string) Object value. |
712
|
|
|
* @return array containing object data. |
713
|
|
|
* @protected |
714
|
|
|
* @since 1.0.000 (2011-06-26) |
715
|
|
|
*/ |
716
|
|
|
protected function getObjectVal($obj) { |
717
|
|
|
if ($obj[0] == 'objref') { |
718
|
|
|
// reference to indirect object |
719
|
|
|
if (isset($this->objects[$obj[1]])) { |
720
|
|
|
// this object has been already parsed |
721
|
|
|
return $this->objects[$obj[1]]; |
722
|
|
|
} elseif (isset($this->xref[$obj[1]])) { |
723
|
|
|
// parse new object |
724
|
|
|
$this->objects[$obj[1]] = $this->getIndirectObject($obj[1], $this->xref[$obj[1]], false); |
725
|
|
|
return $this->objects[$obj[1]]; |
726
|
|
|
} |
727
|
|
|
} |
728
|
|
|
return $obj; |
729
|
|
|
} |
730
|
|
|
|
731
|
|
|
/** |
732
|
|
|
* Decode the specified stream. |
733
|
|
|
* @param $sdic (array) Stream's dictionary array. |
734
|
|
|
* @param $stream (string) Stream to decode. |
735
|
|
|
* @return array containing decoded stream data and remaining filters. |
736
|
|
|
* @protected |
737
|
|
|
* @since 1.0.000 (2011-06-22) |
738
|
|
|
*/ |
739
|
|
|
protected function decodeStream($sdic, $stream) { |
740
|
|
|
// get stream lenght and filters |
741
|
|
|
$slength = strlen($stream); |
742
|
|
|
if ($slength <= 0) { |
743
|
|
|
return array('', array()); |
744
|
|
|
} |
745
|
|
|
$filters = array(); |
746
|
|
|
foreach ($sdic as $k => $v) { |
747
|
|
|
if ($v[0] == '/') { |
748
|
|
|
if (($v[1] == 'Length') AND (isset($sdic[($k + 1)])) AND ($sdic[($k + 1)][0] == 'numeric')) { |
749
|
|
|
// get declared stream lenght |
750
|
|
|
$declength = intval($sdic[($k + 1)][1]); |
751
|
|
|
if ($declength < $slength) { |
752
|
|
|
$stream = substr($stream, 0, $declength); |
753
|
|
|
$slength = $declength; |
754
|
|
|
} |
755
|
|
|
} elseif (($v[1] == 'Filter') AND (isset($sdic[($k + 1)]))) { |
756
|
|
|
// resolve indirect object |
757
|
|
|
$objval = $this->getObjectVal($sdic[($k + 1)]); |
758
|
|
|
if ($objval[0] == '/') { |
759
|
|
|
// single filter |
760
|
|
|
$filters[] = $objval[1]; |
761
|
|
|
} elseif ($objval[0] == '[') { |
762
|
|
|
// array of filters |
763
|
|
|
foreach ($objval[1] as $flt) { |
764
|
|
|
if ($flt[0] == '/') { |
765
|
|
|
$filters[] = $flt[1]; |
766
|
|
|
} |
767
|
|
|
} |
768
|
|
|
} |
769
|
|
|
} |
770
|
|
|
} |
771
|
|
|
} |
772
|
|
|
// decode the stream |
773
|
|
|
$remaining_filters = array(); |
774
|
|
|
foreach ($filters as $filter) { |
775
|
|
|
if (in_array($filter, TCPDF_FILTERS::getAvailableFilters())) { |
776
|
|
|
try { |
777
|
|
|
$stream = TCPDF_FILTERS::decodeFilter($filter, $stream); |
778
|
|
|
} catch (Exception $e) { |
779
|
|
|
$emsg = $e->getMessage(); |
780
|
|
|
if ((($emsg[0] == '~') AND !$this->cfg['ignore_missing_filter_decoders']) |
781
|
|
|
OR (($emsg[0] != '~') AND !$this->cfg['ignore_filter_decoding_errors'])) { |
782
|
|
|
$this->Error($e->getMessage()); |
783
|
|
|
} |
784
|
|
|
} |
785
|
|
|
} else { |
786
|
|
|
// add missing filter to array |
787
|
|
|
$remaining_filters[] = $filter; |
788
|
|
|
} |
789
|
|
|
} |
790
|
|
|
return array($stream, $remaining_filters); |
791
|
|
|
} |
792
|
|
|
|
793
|
|
|
/** |
794
|
|
|
* Throw an exception or print an error message and die if the K_TCPDF_PARSER_THROW_EXCEPTION_ERROR constant is set to true. |
795
|
|
|
* @param $msg (string) The error message |
796
|
|
|
* @public |
797
|
|
|
* @since 1.0.000 (2011-05-23) |
798
|
|
|
*/ |
799
|
|
|
public function Error($msg) { |
800
|
|
|
if ($this->cfg['die_for_errors']) { |
801
|
|
|
die('<strong>TCPDF_PARSER ERROR: </strong>'.$msg); |
802
|
|
|
} else { |
803
|
|
|
throw new Exception('TCPDF_PARSER ERROR: '.$msg); |
804
|
|
|
} |
805
|
|
|
} |
806
|
|
|
|
807
|
|
|
} // END OF TCPDF_PARSER CLASS |
808
|
|
|
|
809
|
|
|
//============================================================+ |
810
|
|
|
// END OF FILE |
811
|
|
|
//============================================================+ |
812
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.