FilterHelper - Code Metrics - smalot/pdfparser - Measure and Improve Code Quality continuously with Scrutinizer

FilterHelper C
last analyzed 2026-01-08 08:18 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	379
Duplicated Lines	0 %

Test Coverage

Coverage

51.01%

Importance

Changes	2
Bugs	0	Features	1

Metric	Value
eloc	171
c	2
b	0
f	1
dl	0
loc	379
ccs	76
cts	149
cp	0.5101
rs	5.5199
wmc	56

7 Methods

Rating	Name	Size	Complexity
B	decodeFilter()	30	11
A	decodeFilterASCIIHexDecode()	30	5
C	decodeFilterLZWDecode()	73	12
F	decodeFilterASCII85Decode()	102	17
A	decodeFilterRunLengthDecode()	29	4
A	decodeFilterFlateDecode()	31	6
A	getAvailableFilters()	3	1

How to fix Complexity

<?php

/**
 * This file is based on code of tecnickcom/TCPDF PDF library.
 *
 * Original author Nicola Asuni ([email protected]) and
 * contributors (https://github.com/tecnickcom/TCPDF/graphs/contributors).
 *
 * @see https://github.com/tecnickcom/TCPDF
 *
 * Original code was licensed on the terms of the LGPL v3.
 *
 * ------------------------------------------------------------------------------
 *
 * @file This file is part of the PdfParser library.
 *
 * @author  Konrad Abicht <[email protected]>
 *
 * @date    2020-01-06
 *
 * @license LGPLv3
 *
 * @url     <https://github.com/smalot/pdfparser>
 *
 *  PdfParser is a pdf library written in PHP, extraction oriented.
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program.
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
 */

namespace Smalot\PdfParser\RawData;

use Smalot\PdfParser\Exception\NotImplementedException;

class FilterHelper
{
    protected $availableFilters = ['ASCIIHexDecode', 'ASCII85Decode', 'LZWDecode', 'FlateDecode', 'RunLengthDecode'];

    /**
     * Decode data using the specified filter type.
     *
     * @param string $filter Filter name
     * @param string $data   Data to decode
     *
     * @return string Decoded data string
     *
     * @throws \Exception
     * @throws \Smalot\PdfParser\Exception\NotImplementedException if a certain decode function is not implemented yet
     */
    public function decodeFilter(string $filter, string $data, int $decodeMemoryLimit = 0): string
    {
        switch ($filter) {
            case 'ASCIIHexDecode':
                return $this->decodeFilterASCIIHexDecode($data);

            case 'ASCII85Decode':
                return $this->decodeFilterASCII85Decode($data);

            case 'LZWDecode':
                return $this->decodeFilterLZWDecode($data);

            case 'FlateDecode':
                return $this->decodeFilterFlateDecode($data, $decodeMemoryLimit);


            case 'RunLengthDecode':
                return $this->decodeFilterRunLengthDecode($data);

            case 'CCITTFaxDecode':
                throw new NotImplementedException('Decode CCITTFaxDecode not implemented yet.');
            case 'JBIG2Decode':
                throw new NotImplementedException('Decode JBIG2Decode not implemented yet.');
            case 'DCTDecode':
                throw new NotImplementedException('Decode DCTDecode not implemented yet.');
            case 'JPXDecode':
                throw new NotImplementedException('Decode JPXDecode not implemented yet.');
            case 'Crypt':
                throw new NotImplementedException('Decode Crypt not implemented yet.');
            default:
                return $data;
        }
    }

    /**
     * ASCIIHexDecode
     *
     * Decodes data encoded in an ASCII hexadecimal representation, reproducing the original binary data.
     *
     * @param string $data Data to decode
     *
     * @return string data string
     *
     * @throws \Exception
     */
    protected function decodeFilterASCIIHexDecode(string $data): string
    {
        // all white-space characters shall be ignored
        $data = preg_replace('/[\s]/', '', $data);
        // check for EOD character: GREATER-THAN SIGN (3Eh)
        $eod = strpos($data, '>');
        if (false !== $eod) {
            // remove EOD and extra data (if any)
            $data = substr($data, 0, $eod);
            $eod = true;
        }
        // get data length
        $data_length = \strlen($data);
        if (0 != ($data_length % 2)) {
            // odd number of hexadecimal digits
            if ($eod) {
                // EOD shall behave as if a 0 (zero) followed the last digit
                $data = substr($data, 0, -1).'0'.substr($data, -1);
            } else {
                throw new \Exception('decodeFilterASCIIHexDecode: invalid code');
            }
        }
        // check for invalid characters
        if (preg_match('/[^a-fA-F\d]/', $data) > 0) {
            throw new \Exception('decodeFilterASCIIHexDecode: invalid code');
        }
        // get one byte of binary data for each pair of ASCII hexadecimal digits
        $decoded = pack('H*', $data);

        return $decoded;
    }

    /**
     * ASCII85Decode
     *
     * Decodes data encoded in an ASCII base-85 representation, reproducing the original binary data.
     *
     * @param string $data Data to decode
     *
     * @return string data string
     *
     * @throws \Exception
     */
    protected function decodeFilterASCII85Decode(string $data): string
    {
        // initialize string to return
        $decoded = '';
        // all white-space characters shall be ignored
        $data = preg_replace('/[\s]/', '', $data);
        // remove start sequence 2-character sequence <~ (3Ch)(7Eh)
        if (0 === strpos($data, '<~')) {
            // remove EOD and extra data (if any)
            $data = substr($data, 2);
        }
        // check for EOD: 2-character sequence ~> (7Eh)(3Eh)
        $eod = strpos($data, '~>');
        if (\strlen($data) - 2 === $eod) {
            // remove EOD and extra data (if any)
            $data = substr($data, 0, $eod);
        }
        // data length
        $data_length = \strlen($data);
        // check for invalid characters
        if (preg_match('/[^\x21-\x75,\x74]/', $data) > 0) {
            throw new \Exception('decodeFilterASCII85Decode: invalid code');
        }
        // z sequence
        $zseq = \chr(0).\chr(0).\chr(0).\chr(0);
        // position inside a group of 4 bytes (0-3)
        $group_pos = 0;
        $tuple = 0;
        $pow85 = [85 * 85 * 85 * 85, 85 * 85 * 85, 85 * 85, 85, 1];

        // for each byte
        for ($i = 0; $i < $data_length; ++$i) {
            // get char value
            $char = \ord($data[$i]);
            if (122 == $char) { // 'z'
                if (0 == $group_pos) {
                    $decoded .= $zseq;
                } else {
                    throw new \Exception('decodeFilterASCII85Decode: invalid code');
                }
            } else {
                // the value represented by a group of 5 characters should never be greater than 2^32 - 1
                $tuple += (($char - 33) * $pow85[$group_pos]);
                if (4 == $group_pos) {
                    // The following if-clauses are an attempt to fix/suppress the following deprecation warning:
                    //      chr(): Providing a value not in-between 0 and 255 is deprecated, this is because a byte value
                    //      must be in the [0, 255] interval. The value used will be constrained using % 256
                    // I know this is ugly and there might be more fancier ways. If you know one, feel free to provide a pull request.
                    if (255 < $tuple >> 8) {
                        $chr8Part = \chr(($tuple >> 8) % 256);
                    } else {
                        $chr8Part = \chr($tuple >> 8);
                    }

                    if (255 < $tuple >> 16) {
                        $chr16Part = \chr(($tuple >> 16) % 256);
                    } else {
                        $chr16Part = \chr($tuple >> 16);
                    }

                    if (255 < $tuple >> 24) {
                        $chr24Part = \chr(($tuple >> 24) % 256);
                    } else {
                        $chr24Part = \chr($tuple >> 24);
                    }

                    if (255 < $tuple) {
                        $chrTuple = \chr($tuple % 256);
                    } else {
                        $chrTuple = \chr($tuple);
                    }

                    $decoded .= $chr24Part . $chr16Part . $chr8Part . $chrTuple;
                    $tuple = 0;
                    $group_pos = 0;
                } else {
                    ++$group_pos;
                }
            }
        }
        if ($group_pos > 1) {
            $tuple += $pow85[$group_pos - 1];
        }
        // last tuple (if any)
        switch ($group_pos) {
            case 4:
                $decoded .= \chr($tuple >> 24).\chr($tuple >> 16).\chr($tuple >> 8);
                break;

            case 3:
                $decoded .= \chr($tuple >> 24).\chr($tuple >> 16);
                break;

            case 2:
                $decoded .= \chr($tuple >> 24);
                break;

            case 1:
                throw new \Exception('decodeFilterASCII85Decode: invalid code');
        }

        return $decoded;
    }

    /**
     * FlateDecode
     *
     * Decompresses data encoded using the zlib/deflate compression method, reproducing the original text or binary data.
     *
     * @param string $data              Data to decode
     * @param int    $decodeMemoryLimit Memory limit on deflation
     *
     * @return string data string
     *
     * @throws \Exception
     */
    protected function decodeFilterFlateDecode(string $data, int $decodeMemoryLimit): ?string
    {
        // Uncatchable E_WARNING for "data error" is @ suppressed
        // so execution may proceed with an alternate decompression
        // method.
        $decoded = @gzuncompress($data, $decodeMemoryLimit);

        if (false === $decoded) {
            // If gzuncompress() failed, try again using the compress.zlib://
            // wrapper to decode it in a file-based context.
            // See: https://www.php.net/manual/en/function.gzuncompress.php#79042
            // Issue: https://github.com/smalot/pdfparser/issues/592
            $ztmp = tmpfile();
            if (false != $ztmp) {
                fwrite($ztmp, "\x1f\x8b\x08\x00\x00\x00\x00\x00".$data);
                $file = stream_get_meta_data($ztmp)['uri'];
                if (0 === $decodeMemoryLimit) {
                    $decoded = file_get_contents('compress.zlib://'.$file);
                } else {
                    $decoded = file_get_contents('compress.zlib://'.$file, false, null, 0, $decodeMemoryLimit);
                }
                fclose($ztmp);
            }
        }

        if (false === \is_string($decoded) || '' === $decoded) {
            // If the decoded string is empty, that means decoding failed.
            throw new \Exception('decodeFilterFlateDecode: invalid data');
        }

        return $decoded;

    }

    /**
     * LZWDecode
     *
     * Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method, reproducing the original text or binary data.
     *
     * @param string $data Data to decode
     *
     * @return string Data string
     */
    protected function decodeFilterLZWDecode(string $data): string
    {
        // initialize string to return
        $decoded = '';
        // data length
        $data_length = \strlen($data);
        // convert string to binary string
        $bitstring = '';
        for ($i = 0; $i < $data_length; ++$i) {
            $bitstring .= \sprintf('%08b', \ord($data[$i]));
        }
        // get the number of bits
        $data_length = \strlen($bitstring);
        // initialize code length in bits
        $bitlen = 9;
        // initialize dictionary index
        $dix = 258;
        // initialize the dictionary (with the first 256 entries).
        $dictionary = [];
        for ($i = 0; $i < 256; ++$i) {
            $dictionary[$i] = \chr($i);
        }
        // previous val
        $prev_index = 0;
        // while we encounter EOD marker (257), read code_length bits
        while (($data_length > 0) && (257 != ($index = bindec(substr($bitstring, 0, $bitlen))))) {
            // remove read bits from string
            $bitstring = substr($bitstring, $bitlen);
            // update number of bits
            $data_length -= $bitlen;
            if (256 == $index) { // clear-table marker
                // reset code length in bits
                $bitlen = 9;
                // reset dictionary index
                $dix = 258;
                $prev_index = 256;
                // reset the dictionary (with the first 256 entries).
                $dictionary = [];
                for ($i = 0; $i < 256; ++$i) {
                    $dictionary[$i] = \chr($i);
                }
            } elseif (256 == $prev_index) {
                // first entry
                $decoded .= $dictionary[$index];
                $prev_index = $index;
            } else {
                // check if index exist in the dictionary
                if ($index < $dix) {
                    // index exist on dictionary
                    $decoded .= $dictionary[$index];
                    $dic_val = $dictionary[$prev_index].$dictionary[$index][0];
                    // store current index
                    $prev_index = $index;
                } else {
                    // index do not exist on dictionary
                    $dic_val = $dictionary[$prev_index].$dictionary[$prev_index][0];
                    $decoded .= $dic_val;
                }
                // update dictionary
                $dictionary[$dix] = $dic_val;
                ++$dix;
                // change bit length by case
                if (2047 == $dix) {
                    $bitlen = 12;
                } elseif (1023 == $dix) {
                    $bitlen = 11;
                } elseif (511 == $dix) {
                    $bitlen = 10;
                }
            }
        }

        return $decoded;
    }

    /**
     * RunLengthDecode
     *
     * Decompresses data encoded using a byte-oriented run-length encoding algorithm.
     *
     * @param string $data Data to decode
     */
    protected function decodeFilterRunLengthDecode(string $data): string
    {
        // initialize string to return
        $decoded = '';
        // data length
        $data_length = \strlen($data);
        $i = 0;
        while ($i < $data_length) {
            // get current byte value
            $byte = \ord($data[$i]);
            if (128 == $byte) {
                // a length value of 128 denote EOD
                break;
            } elseif ($byte < 128) {
                // if the length byte is in the range 0 to 127
                // the following length + 1 (1 to 128) bytes shall be copied literally during decompression
                $decoded .= substr($data, $i + 1, $byte + 1);
                // move to next block
                $i += ($byte + 2);
            } else {
                // if length is in the range 129 to 255,
                // the following single byte shall be copied 257 - length (2 to 128) times during decompression
                $decoded .= str_repeat($data[$i + 1], 257 - $byte);
                // move to next block
                $i += 2;
            }
        }

        return $decoded;
    }

    /**
     * @return array list of available filters
     */
    public function getAvailableFilters(): array
    {
        return $this->availableFilters;
    }
}


1		<?php
2
3		/**
4		* This file is based on code of tecnickcom/TCPDF PDF library.
5		*
6		* Original author Nicola Asuni ([email protected]) and
7		* contributors (https://github.com/tecnickcom/TCPDF/graphs/contributors).
8		*
9		* @see https://github.com/tecnickcom/TCPDF
10		*
11		* Original code was licensed on the terms of the LGPL v3.
12		*
13		* ------------------------------------------------------------------------------
14		*
15		* @file This file is part of the PdfParser library.
16		*
17		* @author Konrad Abicht <[email protected]>
18		*
19		* @date 2020-01-06
20		*
21		* @license LGPLv3
22		*
23		* @url <https://github.com/smalot/pdfparser>
24		*
25		* PdfParser is a pdf library written in PHP, extraction oriented.
26		* Copyright (C) 2017 - Sébastien MALOT <[email protected]>
27		*
28		* This program is free software: you can redistribute it and/or modify
29		* it under the terms of the GNU Lesser General Public License as published by
30		* the Free Software Foundation, either version 3 of the License, or
31		* (at your option) any later version.
32		*
33		* This program is distributed in the hope that it will be useful,
34		* but WITHOUT ANY WARRANTY; without even the implied warranty of
35		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36		* GNU Lesser General Public License for more details.
37		*
38		* You should have received a copy of the GNU Lesser General Public License
39		* along with this program.
40		* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
41		*/
42
43		namespace Smalot\PdfParser\RawData;
44
45		use Smalot\PdfParser\Exception\NotImplementedException;
46
47		class FilterHelper
48		{
49		protected $availableFilters = ['ASCIIHexDecode', 'ASCII85Decode', 'LZWDecode', 'FlateDecode', 'RunLengthDecode'];
50
51		/**
52		* Decode data using the specified filter type.
53		*
54		* @param string $filter Filter name
55		* @param string $data Data to decode
56		*
57		* @return string Decoded data string
58		*
59		* @throws \Exception
60		* @throws \Smalot\PdfParser\Exception\NotImplementedException if a certain decode function is not implemented yet
61		*/
62	81	public function decodeFilter(string $filter, string $data, int $decodeMemoryLimit = 0): string
63		{
64		switch ($filter) {
65	81	case 'ASCIIHexDecode':
66	1	return $this->decodeFilterASCIIHexDecode($data);
67
68	80	case 'ASCII85Decode':
69	4	return $this->decodeFilterASCII85Decode($data);
70
71	76	case 'LZWDecode':
72		return $this->decodeFilterLZWDecode($data);
73
74	76	case 'FlateDecode':
75	70	return $this->decodeFilterFlateDecode($data, $decodeMemoryLimit);
		0 ignored issues – show Bug Best Practice introduced 2023-07-31 17:33 UTC by Report Bug Copy Issue Report The expression `return $this->decodeFilt...ta, $decodeMemoryLimit)` could return the type `null` which is incompatible with the type-hinted return `string`. Consider adding an additional type-check to rule them out. Loading history...
76
77	6	case 'RunLengthDecode':
78		return $this->decodeFilterRunLengthDecode($data);
79
80	6	case 'CCITTFaxDecode':
81	1	throw new NotImplementedException('Decode CCITTFaxDecode not implemented yet.');
82	5	case 'JBIG2Decode':
83	1	throw new NotImplementedException('Decode JBIG2Decode not implemented yet.');
84	4	case 'DCTDecode':
85	1	throw new NotImplementedException('Decode DCTDecode not implemented yet.');
86	3	case 'JPXDecode':
87	1	throw new NotImplementedException('Decode JPXDecode not implemented yet.');
88	2	case 'Crypt':
89	1	throw new NotImplementedException('Decode Crypt not implemented yet.');
90		default:
91	1	return $data;
92		}
93		}
94
95		/**
96		* ASCIIHexDecode
97		*
98		* Decodes data encoded in an ASCII hexadecimal representation, reproducing the original binary data.
99		*
100		* @param string $data Data to decode
101		*
102		* @return string data string
103		*
104		* @throws \Exception
105		*/
106	1	protected function decodeFilterASCIIHexDecode(string $data): string
107		{
108		// all white-space characters shall be ignored
109	1	$data = preg_replace('/[\s]/', '', $data);
110		// check for EOD character: GREATER-THAN SIGN (3Eh)
111	1	$eod = strpos($data, '>');
112	1	if (false !== $eod) {
113		// remove EOD and extra data (if any)
114		$data = substr($data, 0, $eod);
115		$eod = true;
116		}
117		// get data length
118	1	$data_length = \strlen($data);
119	1	if (0 != ($data_length % 2)) {
120		// odd number of hexadecimal digits
121		if ($eod) {
122		// EOD shall behave as if a 0 (zero) followed the last digit
123		$data = substr($data, 0, -1).'0'.substr($data, -1);
124		} else {
125		throw new \Exception('decodeFilterASCIIHexDecode: invalid code');
126		}
127		}
128		// check for invalid characters
129	1	if (preg_match('/[^a-fA-F\d]/', $data) > 0) {
130		throw new \Exception('decodeFilterASCIIHexDecode: invalid code');
131		}
132		// get one byte of binary data for each pair of ASCII hexadecimal digits
133	1	$decoded = pack('H*', $data);
134
135	1	return $decoded;
136		}
137
138		/**
139		* ASCII85Decode
140		*
141		* Decodes data encoded in an ASCII base-85 representation, reproducing the original binary data.
142		*
143		* @param string $data Data to decode
144		*
145		* @return string data string
146		*
147		* @throws \Exception
148		*/
149	4	protected function decodeFilterASCII85Decode(string $data): string
150		{
151		// initialize string to return
152	4	$decoded = '';
153		// all white-space characters shall be ignored
154	4	$data = preg_replace('/[\s]/', '', $data);
155		// remove start sequence 2-character sequence <~ (3Ch)(7Eh)
156	4	if (0 === strpos($data, '<~')) {
157		// remove EOD and extra data (if any)
158	1	$data = substr($data, 2);
159		}
160		// check for EOD: 2-character sequence ~> (7Eh)(3Eh)
161	4	$eod = strpos($data, '~>');
162	4	if (\strlen($data) - 2 === $eod) {
163		// remove EOD and extra data (if any)
164	2	$data = substr($data, 0, $eod);
165		}
166		// data length
167	4	$data_length = \strlen($data);
168		// check for invalid characters
169	4	if (preg_match('/[^\x21-\x75,\x74]/', $data) > 0) {
170		throw new \Exception('decodeFilterASCII85Decode: invalid code');
171		}
172		// z sequence
173	4	$zseq = \chr(0).\chr(0).\chr(0).\chr(0);
174		// position inside a group of 4 bytes (0-3)
175	4	$group_pos = 0;
176	4	$tuple = 0;
177	4	$pow85 = [85 * 85 * 85 * 85, 85 * 85 * 85, 85 * 85, 85, 1];
178
179		// for each byte
180	4	for ($i = 0; $i < $data_length; ++$i) {
181		// get char value
182	4	$char = \ord($data[$i]);
183	4	if (122 == $char) { // 'z'
184		if (0 == $group_pos) {
185		$decoded .= $zseq;
186		} else {
187		throw new \Exception('decodeFilterASCII85Decode: invalid code');
188		}
189		} else {
190		// the value represented by a group of 5 characters should never be greater than 2^32 - 1
191	4	$tuple += (($char - 33) * $pow85[$group_pos]);
192	4	if (4 == $group_pos) {
193	4	// The following if-clauses are an attempt to fix/suppress the following deprecation warning:
194	4	// chr(): Providing a value not in-between 0 and 255 is deprecated, this is because a byte value
195	4	// must be in the [0, 255] interval. The value used will be constrained using % 256
196		// I know this is ugly and there might be more fancier ways. If you know one, feel free to provide a pull request.
197	4	if (255 < $tuple >> 8) {
198		$chr8Part = \chr(($tuple >> 8) % 256);
199		} else {
200		$chr8Part = \chr($tuple >> 8);
201	4	}
202	3
203		if (255 < $tuple >> 16) {
204		$chr16Part = \chr(($tuple >> 16) % 256);
205		} else {
206	4	$chr16Part = \chr($tuple >> 16);
207		}
208
209		if (255 < $tuple >> 24) {
210	4	$chr24Part = \chr(($tuple >> 24) % 256);
211		} else {
212		$chr24Part = \chr($tuple >> 24);
213		}
214	4
215	3	if (255 < $tuple) {
216	3	$chrTuple = \chr($tuple % 256);
217		} else {
218	1	$chrTuple = \chr($tuple);
219		}
220
221		$decoded .= $chr24Part . $chr16Part . $chr8Part . $chrTuple;
222	4	$tuple = 0;
223		$group_pos = 0;
224		} else {
225		++$group_pos;
226		}
227		}
228		}
229		if ($group_pos > 1) {
230		$tuple += $pow85[$group_pos - 1];
231		}
232		// last tuple (if any)
233		switch ($group_pos) {
234		case 4:
235		$decoded .= \chr($tuple >> 24).\chr($tuple >> 16).\chr($tuple >> 8);
236		break;
237	70
238		case 3:
239		$decoded .= \chr($tuple >> 24).\chr($tuple >> 16);
240		break;
241
242	70	case 2:
243		$decoded .= \chr($tuple >> 24);
244	70	break;
245
246		case 1:
247		throw new \Exception('decodeFilterASCII85Decode: invalid code');
248		}
249	5
250	5	return $decoded;
251	5	}
252	5
253	5	/**
254	5	* FlateDecode
255		*
256		* Decompresses data encoded using the zlib/deflate compression method, reproducing the original text or binary data.
257		*
258	5	* @param string $data Data to decode
259		* @param int $decodeMemoryLimit Memory limit on deflation
260		*
261		* @return string data string
262	70	*
263		* @throws \Exception
264	4	*/
265		protected function decodeFilterFlateDecode(string $data, int $decodeMemoryLimit): ?string
266		{
267	66	// Uncatchable E_WARNING for "data error" is @ suppressed
268		// so execution may proceed with an alternate decompression
269		// method.
270		$decoded = @gzuncompress($data, $decodeMemoryLimit);
271
272		if (false === $decoded) {
273		// If gzuncompress() failed, try again using the compress.zlib://
274		// wrapper to decode it in a file-based context.
275		// See: https://www.php.net/manual/en/function.gzuncompress.php#79042
276		// Issue: https://github.com/smalot/pdfparser/issues/592
277		$ztmp = tmpfile();
278		if (false != $ztmp) {
279		fwrite($ztmp, "\x1f\x8b\x08\x00\x00\x00\x00\x00".$data);
280		$file = stream_get_meta_data($ztmp)['uri'];
281		if (0 === $decodeMemoryLimit) {
282		$decoded = file_get_contents('compress.zlib://'.$file);
283		} else {
284		$decoded = file_get_contents('compress.zlib://'.$file, false, null, 0, $decodeMemoryLimit);
285		}
286		fclose($ztmp);
287		}
288		}
289
290		if (false === \is_string($decoded) \|\| '' === $decoded) {
291		// If the decoded string is empty, that means decoding failed.
292		throw new \Exception('decodeFilterFlateDecode: invalid data');
293		}
294
295		return $decoded;
		0 ignored issues – show Bug Best Practice introduced 2023-08-02 14:20 UTC by Report Bug Copy Issue Report The expression `return $decoded` returns the type `false` which is incompatible with the type-hinted return `null\|string`. Loading history...
296		}
297
298		/**
299		* LZWDecode
300		*
301		* Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method, reproducing the original text or binary data.
302		*
303		* @param string $data Data to decode
304		*
305		* @return string Data string
306		*/
307		protected function decodeFilterLZWDecode(string $data): string
308		{
309		// initialize string to return
310		$decoded = '';
311		// data length
312		$data_length = \strlen($data);
313		// convert string to binary string
314		$bitstring = '';
315		for ($i = 0; $i < $data_length; ++$i) {
316		$bitstring .= \sprintf('%08b', \ord($data[$i]));
317		}
318		// get the number of bits
319		$data_length = \strlen($bitstring);
320		// initialize code length in bits
321		$bitlen = 9;
322		// initialize dictionary index
323		$dix = 258;
324		// initialize the dictionary (with the first 256 entries).
325		$dictionary = [];
326		for ($i = 0; $i < 256; ++$i) {
327		$dictionary[$i] = \chr($i);
328		}
329		// previous val
330		$prev_index = 0;
331		// while we encounter EOD marker (257), read code_length bits
332		while (($data_length > 0) && (257 != ($index = bindec(substr($bitstring, 0, $bitlen))))) {
333		// remove read bits from string
334		$bitstring = substr($bitstring, $bitlen);
335		// update number of bits
336		$data_length -= $bitlen;
337		if (256 == $index) { // clear-table marker
338		// reset code length in bits
339		$bitlen = 9;
340		// reset dictionary index
341		$dix = 258;
342		$prev_index = 256;
343		// reset the dictionary (with the first 256 entries).
344		$dictionary = [];
345		for ($i = 0; $i < 256; ++$i) {
346		$dictionary[$i] = \chr($i);
347		}
348		} elseif (256 == $prev_index) {
349		// first entry
350		$decoded .= $dictionary[$index];
351		$prev_index = $index;
352		} else {
353		// check if index exist in the dictionary
354		if ($index < $dix) {
355		// index exist on dictionary
356		$decoded .= $dictionary[$index];
357		$dic_val = $dictionary[$prev_index].$dictionary[$index][0];
358		// store current index
359		$prev_index = $index;
360		} else {
361		// index do not exist on dictionary
362		$dic_val = $dictionary[$prev_index].$dictionary[$prev_index][0];
363		$decoded .= $dic_val;
364		}
365		// update dictionary
366		$dictionary[$dix] = $dic_val;
367		++$dix;
368		// change bit length by case
369		if (2047 == $dix) {
370		$bitlen = 12;
371		} elseif (1023 == $dix) {
372		$bitlen = 11;
373		} elseif (511 == $dix) {
374		$bitlen = 10;
375		}
376		}
377		}
378
379		return $decoded;
380		}
381
382		/**
383		* RunLengthDecode
384		*
385		* Decompresses data encoded using a byte-oriented run-length encoding algorithm.
386		*
387		* @param string $data Data to decode
388		*/
389		protected function decodeFilterRunLengthDecode(string $data): string
390		{
391		// initialize string to return
392		$decoded = '';
393		// data length
394		$data_length = \strlen($data);
395	67	$i = 0;
396		while ($i < $data_length) {
397	67	// get current byte value
398		$byte = \ord($data[$i]);
399		if (128 == $byte) {
400		// a length value of 128 denote EOD
401		break;
402		} elseif ($byte < 128) {
403		// if the length byte is in the range 0 to 127
404		// the following length + 1 (1 to 128) bytes shall be copied literally during decompression
405		$decoded .= substr($data, $i + 1, $byte + 1);
406		// move to next block
407		$i += ($byte + 2);
408		} else {
409		// if length is in the range 129 to 255,
410		// the following single byte shall be copied 257 - length (2 to 128) times during decompression
411		$decoded .= str_repeat($data[$i + 1], 257 - $byte);
412		// move to next block
413		$i += 2;
414		}
415		}
416
417		return $decoded;
418		}
419
420		/**
421		* @return array list of available filters
422		*/
423		public function getAvailableFilters(): array
424		{
425		return $this->availableFilters;
426		}
427		}
428

smalot / pdfparser

FilterHelper C last analyzed 2026-01-08 08:18 UTC

Complexity

Size/Duplication

Test Coverage

Importance

7 Methods

How to fix Complexity

Complex Class

Duplication Side-by-Side

Filter issues like

FilterHelper C
last analyzed 2026-01-08 08:18 UTC