Completed
Push — master ( 66ad27...2fab78 )
by Konrad
12:00 queued 08:18
created

FilterHelper::decodeFilterASCII85Decode()   C

Complexity

Conditions 13
Paths 168

Size

Total Lines 75
Code Lines 45

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 182

Importance

Changes 1
Bugs 0 Features 1
Metric Value
cc 13
eloc 45
c 1
b 0
f 1
nc 168
nop 1
dl 0
loc 75
ccs 0
cts 41
cp 0
crap 182
rs 6.05

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * This file is based on code of tecnickcom/TCPDF PDF library.
5
 *
6
 * Original author Nicola Asuni ([email protected]) and
7
 * contributors (https://github.com/tecnickcom/TCPDF/graphs/contributors).
8
 *
9
 * @see https://github.com/tecnickcom/TCPDF
10
 *
11
 * Original code was licensed on the terms of the LGPL v3.
12
 *
13
 * ------------------------------------------------------------------------------
14
 *
15
 * @file This file is part of the PdfParser library.
16
 *
17
 * @author  Konrad Abicht <[email protected]>
18
 * @date    2020-01-06
19
 *
20
 * @license LGPLv3
21
 * @url     <https://github.com/smalot/pdfparser>
22
 *
23
 *  PdfParser is a pdf library written in PHP, extraction oriented.
24
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
25
 *
26
 *  This program is free software: you can redistribute it and/or modify
27
 *  it under the terms of the GNU Lesser General Public License as published by
28
 *  the Free Software Foundation, either version 3 of the License, or
29
 *  (at your option) any later version.
30
 *
31
 *  This program is distributed in the hope that it will be useful,
32
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
33
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
34
 *  GNU Lesser General Public License for more details.
35
 *
36
 *  You should have received a copy of the GNU Lesser General Public License
37
 *  along with this program.
38
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
39
 */
40
41
namespace Smalot\PdfParser\RawData;
42
43
use Exception;
44
45
class FilterHelper
46
{
47
    protected $availableFilters = ['ASCIIHexDecode', 'ASCII85Decode', 'LZWDecode', 'FlateDecode', 'RunLengthDecode'];
48
49
    /**
50
     * Decode data using the specified filter type.
51
     *
52
     * @param string $filter Filter name
53
     * @param string $data   Data to decode
54
     *
55
     * @return string Decoded data string
56
     *
57
     * @throws Exception if a certain decode function is not implemented yet
58
     */
59 19
    public function decodeFilter($filter, $data)
60
    {
61
        switch ($filter) {
62 19
            case 'ASCIIHexDecode':
63
                return $this->decodeFilterASCIIHexDecode($data);
64
65 19
            case 'ASCII85Decode':
66
                return $this->decodeFilterASCII85Decode($data);
67
68 19
            case 'LZWDecode':
69
                return $this->decodeFilterLZWDecode($data);
70
71 19
            case 'FlateDecode':
72 19
                return $this->decodeFilterFlateDecode($data);
73
74
            case 'RunLengthDecode':
75
                return $this->decodeFilterRunLengthDecode($data);
76
77
            case 'CCITTFaxDecode':
78
                throw new Exception('Decode CCITTFaxDecode not implemented yet.');
79
            case 'JBIG2Decode':
80
                throw new Exception('Decode JBIG2Decode not implemented yet.');
81
            case 'DCTDecode':
82
                throw new Exception('Decode DCTDecode not implemented yet.');
83
            case 'JPXDecode':
84
                throw new Exception('Decode JPXDecode not implemented yet.');
85
            case 'Crypt':
86
                throw new Exception('Decode Crypt not implemented yet.');
87
            default:
88
                return $data;
89
        }
90
    }
91
92
    /**
93
     * ASCIIHexDecode
94
     *
95
     * Decodes data encoded in an ASCII hexadecimal representation, reproducing the original binary data.
96
     *
97
     * @param string $data Data to decode
98
     *
99
     * @return string data string
100
     */
101
    protected function decodeFilterASCIIHexDecode($data)
102
    {
103
        // all white-space characters shall be ignored
104
        $data = preg_replace('/[\s]/', '', $data);
105
        // check for EOD character: GREATER-THAN SIGN (3Eh)
106
        $eod = strpos($data, '>');
107
        if (false !== $eod) {
108
            // remove EOD and extra data (if any)
109
            $data = substr($data, 0, $eod);
110
            $eod = true;
111
        }
112
        // get data length
113
        $data_length = \strlen($data);
114
        if (0 != ($data_length % 2)) {
115
            // odd number of hexadecimal digits
116
            if ($eod) {
117
                // EOD shall behave as if a 0 (zero) followed the last digit
118
                $data = substr($data, 0, -1).'0'.substr($data, -1);
119
            } else {
120
                throw new Exception('decodeFilterASCIIHexDecode: invalid code');
121
            }
122
        }
123
        // check for invalid characters
124
        if (preg_match('/[^a-fA-F\d]/', $data) > 0) {
125
            throw new Exception('decodeFilterASCIIHexDecode: invalid code');
126
        }
127
        // get one byte of binary data for each pair of ASCII hexadecimal digits
128
        $decoded = pack('H*', $data);
129
130
        return $decoded;
131
    }
132
133
    /**
134
     * ASCII85Decode
135
     *
136
     * Decodes data encoded in an ASCII base-85 representation, reproducing the original binary data.
137
     *
138
     * @param string $data Data to decode
139
     *
140
     * @return string data string
141
     */
142
    protected function decodeFilterASCII85Decode($data)
143
    {
144
        // initialize string to return
145
        $decoded = '';
146
        // all white-space characters shall be ignored
147
        $data = preg_replace('/[\s]/', '', $data);
148
        // remove start sequence 2-character sequence <~ (3Ch)(7Eh)
149
        if (false !== strpos($data, '<~')) {
150
            // remove EOD and extra data (if any)
151
            $data = substr($data, 2);
152
        }
153
        // check for EOD: 2-character sequence ~> (7Eh)(3Eh)
154
        $eod = strpos($data, '~>');
155
        if (false !== $eod) {
156
            // remove EOD and extra data (if any)
157
            $data = substr($data, 0, $eod);
158
        }
159
        // data length
160
        $data_length = \strlen($data);
161
        // check for invalid characters
162
        if (preg_match('/[^\x21-\x75,\x74]/', $data) > 0) {
163
            throw new Exception('decodeFilterASCII85Decode: invalid code');
164
        }
165
        // z sequence
166
        $zseq = \chr(0).\chr(0).\chr(0).\chr(0);
167
        // position inside a group of 4 bytes (0-3)
168
        $group_pos = 0;
169
        $tuple = 0;
170
        $pow85 = [(85 * 85 * 85 * 85), (85 * 85 * 85), (85 * 85), 85, 1];
171
172
        // for each byte
173
        for ($i = 0; $i < $data_length; ++$i) {
174
            // get char value
175
            $char = \ord($data[$i]);
176
            if (122 == $char) { // 'z'
177
                if (0 == $group_pos) {
178
                    $decoded .= $zseq;
179
                } else {
180
                    throw new Exception('decodeFilterASCII85Decode: invalid code');
181
                }
182
            } else {
183
                // the value represented by a group of 5 characters should never be greater than 2^32 - 1
184
                $tuple += (($char - 33) * $pow85[$group_pos]);
185
                if (4 == $group_pos) {
186
                    $decoded .= \chr($tuple >> 24).\chr($tuple >> 16).\chr($tuple >> 8).\chr($tuple);
187
                    $tuple = 0;
188
                    $group_pos = 0;
189
                } else {
190
                    ++$group_pos;
191
                }
192
            }
193
        }
194
        if ($group_pos > 1) {
195
            $tuple += $pow85[($group_pos - 1)];
196
        }
197
        // last tuple (if any)
198
        switch ($group_pos) {
199
            case 4:
200
                $decoded .= \chr($tuple >> 24).\chr($tuple >> 16).\chr($tuple >> 8);
201
                break;
202
203
            case 3:
204
                $decoded .= \chr($tuple >> 24).\chr($tuple >> 16);
205
                break;
206
207
            case 2:
208
                $decoded .= \chr($tuple >> 24);
209
                break;
210
211
            case 1:
212
                throw new Exception('decodeFilterASCII85Decode: invalid code');
213
                break;
214
        }
215
216
        return $decoded;
217
    }
218
219
    /**
220
     * FlateDecode
221
     *
222
     * Decompresses data encoded using the zlib/deflate compression method, reproducing the original text or binary data.
223
     *
224
     * @param string $data Data to decode
225
     *
226
     * @return string data string
227
     */
228
    protected function decodeFilterFlateDecode($data)
229
    {
230
        /*
231
         * gzuncompress may throw a not catchable E_WARNING in case of an error (like $data is empty)
232
         * the following set_error_handler changes an E_WARNING to an E_ERROR, which is catchable.
233
         */
234 19
        set_error_handler(function ($errNo, $errStr) {
235 3
            if (E_WARNING === $errNo) {
236 3
                throw new Exception($errStr);
237
            } else {
238
                // fallback to default php error handler
239
                return false;
240
            }
241 19
        });
242
243
        // initialize string to return
244
        try {
245 19
            $decoded = gzuncompress($data);
246 17
            if (false === $decoded) {
247 17
                throw new Exception('decodeFilterFlateDecode: invalid code');
248
            }
249 3
        } catch (Exception $e) {
250 3
            throw $e;
251 17
        } finally {
252
            // Restore old handler just in case it was customized outside of PDFParser.
253 19
            restore_error_handler();
254
        }
255
256 17
        return $decoded;
257
    }
258
259
    /**
260
     * LZWDecode
261
     *
262
     * Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method, reproducing the original text or binary data.
263
     *
264
     * @param string $data Data to decode
265
     *
266
     * @return string Data string
267
     */
268
    protected function decodeFilterLZWDecode($data)
269
    {
270
        // initialize string to return
271
        $decoded = '';
272
        // data length
273
        $data_length = \strlen($data);
274
        // convert string to binary string
275
        $bitstring = '';
276
        for ($i = 0; $i < $data_length; ++$i) {
277
            $bitstring .= sprintf('%08b', \ord($data[$i]));
278
        }
279
        // get the number of bits
280
        $data_length = \strlen($bitstring);
281
        // initialize code length in bits
282
        $bitlen = 9;
283
        // initialize dictionary index
284
        $dix = 258;
285
        // initialize the dictionary (with the first 256 entries).
286
        $dictionary = [];
287
        for ($i = 0; $i < 256; ++$i) {
288
            $dictionary[$i] = \chr($i);
289
        }
290
        // previous val
291
        $prev_index = 0;
292
        // while we encounter EOD marker (257), read code_length bits
293
        while (($data_length > 0) and (257 != ($index = bindec(substr($bitstring, 0, $bitlen))))) {
294
            // remove read bits from string
295
            $bitstring = substr($bitstring, $bitlen);
296
            // update number of bits
297
            $data_length -= $bitlen;
298
            if (256 == $index) { // clear-table marker
299
                // reset code length in bits
300
                $bitlen = 9;
301
                // reset dictionary index
302
                $dix = 258;
303
                $prev_index = 256;
304
                // reset the dictionary (with the first 256 entries).
305
                $dictionary = [];
306
                for ($i = 0; $i < 256; ++$i) {
307
                    $dictionary[$i] = \chr($i);
308
                }
309
            } elseif (256 == $prev_index) {
310
                // first entry
311
                $decoded .= $dictionary[$index];
312
                $prev_index = $index;
313
            } else {
314
                // check if index exist in the dictionary
315
                if ($index < $dix) {
316
                    // index exist on dictionary
317
                    $decoded .= $dictionary[$index];
318
                    $dic_val = $dictionary[$prev_index].$dictionary[$index][0];
319
                    // store current index
320
                    $prev_index = $index;
321
                } else {
322
                    // index do not exist on dictionary
323
                    $dic_val = $dictionary[$prev_index].$dictionary[$prev_index][0];
324
                    $decoded .= $dic_val;
325
                }
326
                // update dictionary
327
                $dictionary[$dix] = $dic_val;
328
                ++$dix;
329
                // change bit length by case
330
                if (2047 == $dix) {
331
                    $bitlen = 12;
332
                } elseif (1023 == $dix) {
333
                    $bitlen = 11;
334
                } elseif (511 == $dix) {
335
                    $bitlen = 10;
336
                }
337
            }
338
        }
339
340
        return $decoded;
341
    }
342
343
    /**
344
     * RunLengthDecode
345
     *
346
     * Decompresses data encoded using a byte-oriented run-length encoding algorithm.
347
     *
348
     * @param string $data Data to decode
349
     *
350
     * @return string
351
     */
352
    protected function decodeFilterRunLengthDecode($data)
353
    {
354
        // initialize string to return
355
        $decoded = '';
356
        // data length
357
        $data_length = \strlen($data);
358
        $i = 0;
359
        while ($i < $data_length) {
360
            // get current byte value
361
            $byte = \ord($data[$i]);
362
            if (128 == $byte) {
363
                // a length value of 128 denote EOD
364
                break;
365
            } elseif ($byte < 128) {
366
                // if the length byte is in the range 0 to 127
367
                // the following length + 1 (1 to 128) bytes shall be copied literally during decompression
368
                $decoded .= substr($data, ($i + 1), ($byte + 1));
369
                // move to next block
370
                $i += ($byte + 2);
371
            } else {
372
                // if length is in the range 129 to 255,
373
                // the following single byte shall be copied 257 - length (2 to 128) times during decompression
374
                $decoded .= str_repeat($data[($i + 1)], (257 - $byte));
375
                // move to next block
376
                $i += 2;
377
            }
378
        }
379
380
        return $decoded;
381
    }
382
383
    /**
384
     * @return array list of available filters
385
     */
386 16
    public function getAvailableFilters()
387
    {
388 16
        return $this->availableFilters;
389
    }
390
}
391