Test Failed
Push — pr/257 ( 57d61f )
by Konrad
05:10 queued 13s
created

FilterHelper::decodeFilter()   B

Complexity

Conditions 11
Paths 11

Size

Total Lines 30
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
cc 11
eloc 23
c 1
b 0
f 1
nc 11
nop 2
dl 0
loc 30
rs 7.3166

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * This file is based on code of tecnickcom/TCPDF PDF library.
5
 *
6
 * Original author Nicola Asuni ([email protected]) and
7
 * contributors (https://github.com/tecnickcom/TCPDF/graphs/contributors).
8
 *
9
 * @see https://github.com/tecnickcom/TCPDF
10
 *
11
 * Original code was licensed on the terms of the LGPL v3.
12
 *
13
 * ------------------------------------------------------------------------------
14
 *
15
 * @file This file is part of the PdfParser library.
16
 *
17
 * @author  Konrad Abicht <[email protected]>
18
 * @date    2020-01-06
19
 *
20
 * @license LGPLv3
21
 * @url     <https://github.com/smalot/pdfparser>
22
 *
23
 *  PdfParser is a pdf library written in PHP, extraction oriented.
24
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
25
 *
26
 *  This program is free software: you can redistribute it and/or modify
27
 *  it under the terms of the GNU Lesser General Public License as published by
28
 *  the Free Software Foundation, either version 3 of the License, or
29
 *  (at your option) any later version.
30
 *
31
 *  This program is distributed in the hope that it will be useful,
32
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
33
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
34
 *  GNU Lesser General Public License for more details.
35
 *
36
 *  You should have received a copy of the GNU Lesser General Public License
37
 *  along with this program.
38
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
39
 */
40
41
namespace Smalot\PdfParser\RawData;
42
43
use Exception;
44
45
class FilterHelper
46
{
47
    protected $availableFilters = ['ASCIIHexDecode', 'ASCII85Decode', 'LZWDecode', 'FlateDecode', 'RunLengthDecode'];
48
49
    /**
50
     * Decode data using the specified filter type.
51
     *
52
     * @param string $filter Filter name
53
     * @param string $data   Data to decode
54
     *
55
     * @return string Decoded data string
56
     *
57
     * @throws Exception if a certain decode function is not implemented yet
58
     */
59
    public function decodeFilter($filter, $data)
60
    {
61
        switch ($filter) {
62
            case 'ASCIIHexDecode':
63
                return $this->decodeFilterASCIIHexDecode($data);
64
65
            case 'ASCII85Decode':
66
                return $this->decodeFilterASCII85Decode($data);
67
68
            case 'LZWDecode':
69
                return $this->decodeFilterLZWDecode($data);
70
71
            case 'FlateDecode':
72
                return $this->decodeFilterFlateDecode($data);
73
74
            case 'RunLengthDecode':
75
                return $this->decodeFilterRunLengthDecode($data);
76
77
            case 'CCITTFaxDecode':
78
                throw new Exception('Decode CCITTFaxDecode not implemented yet.');
79
            case 'JBIG2Decode':
80
                throw new Exception('Decode JBIG2Decode not implemented yet.');
81
            case 'DCTDecode':
82
                throw new Exception('Decode DCTDecode not implemented yet.');
83
            case 'JPXDecode':
84
                throw new Exception('Decode JPXDecode not implemented yet.');
85
            case 'Crypt':
86
                throw new Exception('Decode Crypt not implemented yet.');
87
            default:
88
                return $data;
89
        }
90
    }
91
92
    /**
93
     * ASCIIHexDecode
94
     *
95
     * Decodes data encoded in an ASCII hexadecimal representation, reproducing the original binary data.
96
     *
97
     * @param string $data Data to decode
98
     *
99
     * @return string data string
100
     */
101
    protected function decodeFilterASCIIHexDecode($data)
102
    {
103
        // all white-space characters shall be ignored
104
        $data = preg_replace('/[\s]/', '', $data);
105
        // check for EOD character: GREATER-THAN SIGN (3Eh)
106
        $eod = strpos($data, '>');
107
        if (false !== $eod) {
108
            // remove EOD and extra data (if any)
109
            $data = substr($data, 0, $eod);
110
            $eod = true;
111
        }
112
        // get data length
113
        $data_length = \strlen($data);
114
        if (0 != ($data_length % 2)) {
115
            // odd number of hexadecimal digits
116
            if ($eod) {
117
                // EOD shall behave as if a 0 (zero) followed the last digit
118
                $data = substr($data, 0, -1).'0'.substr($data, -1);
119
            } else {
120
                throw new Exception('decodeFilterASCIIHexDecode: invalid code');
121
            }
122
        }
123
        // check for invalid characters
124
        if (preg_match('/[^a-fA-F\d]/', $data) > 0) {
125
            throw new Exception('decodeFilterASCIIHexDecode: invalid code');
126
        }
127
        // get one byte of binary data for each pair of ASCII hexadecimal digits
128
        $decoded = pack('H*', $data);
129
130
        return $decoded;
131
    }
132
133
    /**
134
     * ASCII85Decode
135
     *
136
     * Decodes data encoded in an ASCII base-85 representation, reproducing the original binary data.
137
     *
138
     * @param string $data Data to decode
139
     *
140
     * @return string data string
141
     */
142
    protected function decodeFilterASCII85Decode($data)
143
    {
144
        // initialize string to return
145
        $decoded = '';
146
        // all white-space characters shall be ignored
147
        $data = preg_replace('/[\s]/', '', $data);
148
        // remove start sequence 2-character sequence <~ (3Ch)(7Eh)
149
        if (false !== strpos($data, '<~')) {
150
            // remove EOD and extra data (if any)
151
            $data = substr($data, 2);
152
        }
153
        // check for EOD: 2-character sequence ~> (7Eh)(3Eh)
154
        $eod = strpos($data, '~>');
155
        if (false !== $eod) {
156
            // remove EOD and extra data (if any)
157
            $data = substr($data, 0, $eod);
158
        }
159
        // data length
160
        $data_length = \strlen($data);
161
        // check for invalid characters
162
        if (preg_match('/[^\x21-\x75,\x74]/', $data) > 0) {
163
            throw new Exception('decodeFilterASCII85Decode: invalid code');
164
        }
165
        // z sequence
166
        $zseq = \chr(0).\chr(0).\chr(0).\chr(0);
167
        // position inside a group of 4 bytes (0-3)
168
        $group_pos = 0;
169
        $tuple = 0;
170
        $pow85 = [(85 * 85 * 85 * 85), (85 * 85 * 85), (85 * 85), 85, 1];
171
172
        // for each byte
173
        for ($i = 0; $i < $data_length; ++$i) {
174
            // get char value
175
            $char = \ord($data[$i]);
176
            if (122 == $char) { // 'z'
177
                if (0 == $group_pos) {
178
                    $decoded .= $zseq;
179
                } else {
180
                    throw new Exception('decodeFilterASCII85Decode: invalid code');
181
                }
182
            } else {
183
                // the value represented by a group of 5 characters should never be greater than 2^32 - 1
184
                $tuple += (($char - 33) * $pow85[$group_pos]);
185
                if (4 == $group_pos) {
186
                    $decoded .= \chr($tuple >> 24).\chr($tuple >> 16).\chr($tuple >> 8).\chr($tuple);
187
                    $tuple = 0;
188
                    $group_pos = 0;
189
                } else {
190
                    ++$group_pos;
191
                }
192
            }
193
        }
194
        if ($group_pos > 1) {
195
            $tuple += $pow85[($group_pos - 1)];
196
        }
197
        // last tuple (if any)
198
        switch ($group_pos) {
199
            case 4:
200
                $decoded .= \chr($tuple >> 24).\chr($tuple >> 16).\chr($tuple >> 8);
201
                break;
202
203
            case 3:
204
                $decoded .= \chr($tuple >> 24).\chr($tuple >> 16);
205
                break;
206
207
            case 2:
208
                $decoded .= \chr($tuple >> 24);
209
                break;
210
211
            case 1:
212
                throw new Exception('decodeFilterASCII85Decode: invalid code');
213
        }
214
215
        return $decoded;
216
    }
217
218
    /**
219
     * FlateDecode
220
     *
221
     * Decompresses data encoded using the zlib/deflate compression method, reproducing the original text or binary data.
222
     *
223
     * @param string $data Data to decode
224
     *
225
     * @return string data string
226
     */
227
    protected function decodeFilterFlateDecode($data)
228
    {
229
        /*
230
         * gzuncompress may throw a not catchable E_WARNING in case of an error (like $data is empty)
231
         * the following set_error_handler changes an E_WARNING to an E_ERROR, which is catchable.
232
         */
233
        set_error_handler(function ($errNo, $errStr) {
234
            if (E_WARNING === $errNo) {
235
                throw new Exception($errStr);
236
            } else {
237
                // fallback to default php error handler
238
                return false;
239
            }
240
        });
241
242
        // initialize string to return
243
        try {
244
            $decoded = gzuncompress($data);
245
            if (false === $decoded) {
246
                throw new Exception('decodeFilterFlateDecode: invalid code');
247
            }
248
        } catch (Exception $e) {
249
            throw $e;
250
        } finally {
251
            // Restore old handler just in case it was customized outside of PDFParser.
252
            restore_error_handler();
253
        }
254
255
        return $decoded;
256
    }
257
258
    /**
259
     * LZWDecode
260
     *
261
     * Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method, reproducing the original text or binary data.
262
     *
263
     * @param string $data Data to decode
264
     *
265
     * @return string Data string
266
     */
267
    protected function decodeFilterLZWDecode($data)
268
    {
269
        // initialize string to return
270
        $decoded = '';
271
        // data length
272
        $data_length = \strlen($data);
273
        // convert string to binary string
274
        $bitstring = '';
275
        for ($i = 0; $i < $data_length; ++$i) {
276
            $bitstring .= sprintf('%08b', \ord($data[$i]));
277
        }
278
        // get the number of bits
279
        $data_length = \strlen($bitstring);
280
        // initialize code length in bits
281
        $bitlen = 9;
282
        // initialize dictionary index
283
        $dix = 258;
284
        // initialize the dictionary (with the first 256 entries).
285
        $dictionary = [];
286
        for ($i = 0; $i < 256; ++$i) {
287
            $dictionary[$i] = \chr($i);
288
        }
289
        // previous val
290
        $prev_index = 0;
291
        // while we encounter EOD marker (257), read code_length bits
292
        while (($data_length > 0) and (257 != ($index = bindec(substr($bitstring, 0, $bitlen))))) {
293
            // remove read bits from string
294
            $bitstring = substr($bitstring, $bitlen);
295
            // update number of bits
296
            $data_length -= $bitlen;
297
            if (256 == $index) { // clear-table marker
298
                // reset code length in bits
299
                $bitlen = 9;
300
                // reset dictionary index
301
                $dix = 258;
302
                $prev_index = 256;
303
                // reset the dictionary (with the first 256 entries).
304
                $dictionary = [];
305
                for ($i = 0; $i < 256; ++$i) {
306
                    $dictionary[$i] = \chr($i);
307
                }
308
            } elseif (256 == $prev_index) {
309
                // first entry
310
                $decoded .= $dictionary[$index];
311
                $prev_index = $index;
312
            } else {
313
                // check if index exist in the dictionary
314
                if ($index < $dix) {
315
                    // index exist on dictionary
316
                    $decoded .= $dictionary[$index];
317
                    $dic_val = $dictionary[$prev_index].$dictionary[$index][0];
318
                    // store current index
319
                    $prev_index = $index;
320
                } else {
321
                    // index do not exist on dictionary
322
                    $dic_val = $dictionary[$prev_index].$dictionary[$prev_index][0];
323
                    $decoded .= $dic_val;
324
                }
325
                // update dictionary
326
                $dictionary[$dix] = $dic_val;
327
                ++$dix;
328
                // change bit length by case
329
                if (2047 == $dix) {
330
                    $bitlen = 12;
331
                } elseif (1023 == $dix) {
332
                    $bitlen = 11;
333
                } elseif (511 == $dix) {
334
                    $bitlen = 10;
335
                }
336
            }
337
        }
338
339
        return $decoded;
340
    }
341
342
    /**
343
     * RunLengthDecode
344
     *
345
     * Decompresses data encoded using a byte-oriented run-length encoding algorithm.
346
     *
347
     * @param string $data Data to decode
348
     *
349
     * @return string
350
     */
351
    protected function decodeFilterRunLengthDecode($data)
352
    {
353
        // initialize string to return
354
        $decoded = '';
355
        // data length
356
        $data_length = \strlen($data);
357
        $i = 0;
358
        while ($i < $data_length) {
359
            // get current byte value
360
            $byte = \ord($data[$i]);
361
            if (128 == $byte) {
362
                // a length value of 128 denote EOD
363
                break;
364
            } elseif ($byte < 128) {
365
                // if the length byte is in the range 0 to 127
366
                // the following length + 1 (1 to 128) bytes shall be copied literally during decompression
367
                $decoded .= substr($data, ($i + 1), ($byte + 1));
368
                // move to next block
369
                $i += ($byte + 2);
370
            } else {
371
                // if length is in the range 129 to 255,
372
                // the following single byte shall be copied 257 - length (2 to 128) times during decompression
373
                $decoded .= str_repeat($data[($i + 1)], (257 - $byte));
374
                // move to next block
375
                $i += 2;
376
            }
377
        }
378
379
        return $decoded;
380
    }
381
382
    /**
383
     * @return array list of available filters
384
     */
385
    public function getAvailableFilters()
386
    {
387
        return $this->availableFilters;
388
    }
389
}
390