Test Failed
Pull Request — master (#560)
by
unknown
07:05
created

Parser::parseHeaderElement()   D

Complexity

Conditions 18
Paths 17

Size

Total Lines 57
Code Lines 37

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 35
CRAP Score 18.0513

Importance

Changes 3
Bugs 1 Features 0
Metric Value
cc 18
eloc 37
c 3
b 1
f 0
nc 17
nop 3
dl 0
loc 57
ccs 35
cts 37
cp 0.9459
crap 18.0513
rs 4.8666

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 *
9
 * @date    2017-01-03
10
 *
11
 * @license LGPLv3
12
 *
13
 * @url     <https://github.com/smalot/pdfparser>
14
 *
15
 *  PdfParser is a pdf library written in PHP, extraction oriented.
16
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
17
 *
18
 *  This program is free software: you can redistribute it and/or modify
19
 *  it under the terms of the GNU Lesser General Public License as published by
20
 *  the Free Software Foundation, either version 3 of the License, or
21
 *  (at your option) any later version.
22
 *
23
 *  This program is distributed in the hope that it will be useful,
24
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
25
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26
 *  GNU Lesser General Public License for more details.
27
 *
28
 *  You should have received a copy of the GNU Lesser General Public License
29
 *  along with this program.
30
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
31
 */
32
33
namespace Smalot\PdfParser;
34
35
use Smalot\PdfParser\Element\ElementArray;
36
use Smalot\PdfParser\Element\ElementBoolean;
37
use Smalot\PdfParser\Element\ElementDate;
38
use Smalot\PdfParser\Element\ElementHexa;
39
use Smalot\PdfParser\Element\ElementName;
40
use Smalot\PdfParser\Element\ElementNull;
41
use Smalot\PdfParser\Element\ElementNumeric;
42
use Smalot\PdfParser\Element\ElementString;
43
use Smalot\PdfParser\Element\ElementXRef;
44
use Smalot\PdfParser\RawData\RawDataParser;
45
46
/**
47
 * Class Parser
48
 */
49
class Parser
50
{
51
    /**
52
     * @var Config
53
     */
54
    private $config;
55
56
    /**
57
     * @var PDFObject[]
58
     */
59
    protected $objects = [];
60
61
    protected $rawDataParser;
62
63 41
    public function __construct($cfg = [], ?Config $config = null)
64
    {
65 41
        $this->config = $config ?: new Config();
66 41
        $this->rawDataParser = new RawDataParser($cfg, $this->config);
67 41
    }
68
69 1
    public function getConfig(): Config
70
    {
71 1
        return $this->config;
72
    }
73
74
    /**
75
     * @throws \Exception
76
     */
77 39
    public function parseFile(string $filename): Document
78
    {
79 39
        $content = file_get_contents($filename);
80
        /*
81
         * 2018/06/20 @doganoo as multiple times a
82
         * users have complained that the parseFile()
83
         * method dies silently, it is an better option
84
         * to remove the error control operator (@) and
85
         * let the users know that the method throws an exception
86
         * by adding @throws tag to PHPDoc.
87
         *
88
         * See here for an example: https://github.com/smalot/pdfparser/issues/204
89
         */
90 39
        return $this->parseContent($content);
91
    }
92
93
    /**
94
     * @param string $content PDF content to parse
95
     *
96
     * @throws \Exception if secured PDF file was detected
97
     * @throws \Exception if no object list was found
98
     */
99 39
    public function parseContent(string $content): Document
100
    {
101
        // Create structure from raw data.
102 39
        list($xref, $data) = $this->rawDataParser->parseData($content);
103
104 38
        if (isset($xref['trailer']['encrypt'])) {
105
            throw new \Exception('Secured pdf file are currently not supported.');
106
        }
107
108 38
        if (empty($data)) {
109
            throw new \Exception('Object list not found. Possible secured file.');
110
        }
111
112
        // Create destination object.
113 38
        $document = new Document();
114 38
        $this->objects = [];
115
116 38
        foreach ($data as $id => $structure) {
117 38
            $this->parseObject($id, $structure, $document);
118 38
            unset($data[$id]);
119
        }
120
121 38
        $document->setTrailer($this->parseTrailer($xref['trailer'], $document));
122 38
        $document->setObjects($this->objects);
123
124 38
        return $document;
125
    }
126
127 38
    protected function parseTrailer(array $structure, ?Document $document)
128
    {
129 38
        $trailer = [];
130
131 38
        foreach ($structure as $name => $values) {
132 38
            $name = ucfirst($name);
133
134 38
            if (is_numeric($values)) {
135 38
                $trailer[$name] = new ElementNumeric($values);
136 38
            } elseif (\is_array($values)) {
137 33
                $value = $this->parseTrailer($values, null);
138 33
                $trailer[$name] = new ElementArray($value, null);
139 38
            } elseif (false !== strpos($values, '_')) {
140 38
                $trailer[$name] = new ElementXRef($values, $document);
141
            } else {
142 33
                $trailer[$name] = $this->parseHeaderElement('(', $values, $document);
143
            }
144
        }
145
146 38
        return new Header($trailer, $document);
147
    }
148
149 39
    protected function parseObject(string $id, array $structure, ?Document $document)
150
    {
151 39
        $header = new Header([], $document);
152 39
        $content = '';
153
154 39
        foreach ($structure as $position => $part) {
155 39
            if (\is_int($part)) {
156
                $part = [null, null];
157
            }
158 39
            switch ($part[0]) {
159 39
                case '[':
160 13
                    $elements = [];
161
162 13
                    foreach ($part[1] as $sub_element) {
163 13
                        $sub_type = $sub_element[0];
164 13
                        $sub_value = $sub_element[1];
165 13
                        $elements[] = $this->parseHeaderElement($sub_type, $sub_value, $document);
166
                    }
167
168 13
                    $header = new Header($elements, $document);
169 13
                    break;
170
171 39
                case '<<':
172 39
                    $header = $this->parseHeader($part[1], $document);
173 39
                    break;
174
175 39
                case 'stream':
176 39
                    $content = isset($part[3][0]) ? $part[3][0] : $part[1];
177
178 39
                    if ($header->get('Type')->equals('ObjStm')) {
179 11
                        $match = [];
180
181
                        // Split xrefs and contents.
182 11
                        preg_match('/^((\d+\s+\d+\s*)*)(.*)$/s', $content, $match);
183 11
                        $content = $match[3];
184
185
                        // Extract xrefs.
186 11
                        $xrefs = preg_split(
187 11
                            '/(\d+\s+\d+\s*)/s',
188 11
                            $match[1],
189 11
                            -1,
190 11
                            \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE
191
                        );
192 11
                        $table = [];
193
194 11
                        foreach ($xrefs as $xref) {
195 11
                            list($id, $position) = preg_split("/\s+/", trim($xref));
196 11
                            $table[$position] = $id;
197
                        }
198
199 11
                        ksort($table);
200
201 11
                        $ids = array_values($table);
202 11
                        $positions = array_keys($table);
203
204 11
                        foreach ($positions as $index => $position) {
205 11
                            $id = $ids[$index].'_0';
206 11
                            $next_position = isset($positions[$index + 1]) ? $positions[$index + 1] : \strlen($content);
207 11
                            $sub_content = substr($content, $position, (int) $next_position - (int) $position);
208
209 11
                            $sub_header = Header::parse($sub_content, $document);
210 11
                            $object = PDFObject::factory($document, $sub_header, '', $this->config);
211 11
                            $this->objects[$id] = $object;
212
                        }
213
214
                        // It is not necessary to store this content.
215
216 11
                        return;
217
                    }
218 38
                    break;
219
220
                default:
221 38
                    if ('null' != $part) {
222 38
                        $element = $this->parseHeaderElement($part[0], $part[1], $document);
223
224 38
                        if ($element) {
225 20
                            $header = new Header([$element], $document);
226
                        }
227
                    }
228 38
                    break;
229
            }
230
        }
231
232 38
        if (!isset($this->objects[$id])) {
233 38
            $this->objects[$id] = PDFObject::factory($document, $header, $content, $this->config);
234
        }
235 38
    }
236
237
    /**
238
     * @throws \Exception
239
     */
240 39
    protected function parseHeader(array $structure, ?Document $document): Header
241
    {
242 39
        $elements = [];
243 39
        $count = \count($structure);
244
245 39
        for ($position = 0; $position < $count; $position += 2) {
246 39
            $name = $structure[$position][1];
247 39
            $type = $structure[$position + 1][0];
248 39
            $value = $structure[$position + 1][1];
249
250 39
            $elements[$name] = $this->parseHeaderElement($type, $value, $document);
251
        }
252
253 39
        return new Header($elements, $document);
254
    }
255
256
    /**
257
     * @param string|array $value
258
     *
259
     * @return Element|Header|null
260
     *
261
     * @throws \Exception
262
     */
263 39
    protected function parseHeaderElement(?string $type, $value, ?Document $document)
264
    {
265 39
        $valueIsEmpty = null == $value || '' == $value || false == $value;
266 39
        if (('<<' === $type || '>>' === $type) && $valueIsEmpty)) {
0 ignored issues
show
Bug introduced by
A parse error occurred: Syntax error, unexpected ')' on line 266 at column 64
Loading history...
267 39
            $value = [];
268 38
        }
269 38
270
        switch ($type) {
271 38
            case '<<':
272
            case '>>':
273 39
                $header = $this->parseHeader($value, $document);
274 38
                PDFObject::factory($document, $header, null, $this->config);
275
276 39
                return $header;
277 13
278
            case 'numeric':
279 39
                return new ElementNumeric($value);
280 3
281
            case 'boolean':
282 39
                return new ElementBoolean($value);
283 38
284 31
            case 'null':
285
                return new ElementNull();
286
287 38
            case '(':
288
                if ($date = ElementDate::parse('('.$value.')', $document)) {
289 39
                    return $date;
290 15
                }
291
292 39
                return ElementString::parse('('.$value.')', $document);
293 39
294
            case '<':
295 38
                return $this->parseHeaderElement('(', ElementHexa::decode($value), $document);
296 38
297 38
            case '/':
298
                return ElementName::parse('/'.$value, $document);
299 38
300 38
            case 'ojbref': // old mistake in tcpdf parser
301
            case 'objref':
302 38
                return new ElementXRef($value, $document);
303 38
304 38
            case '[':
305 38
                $values = [];
306 38
307
                if (\is_array($value)) {
308
                    foreach ($value as $sub_element) {
309
                        $sub_type = $sub_element[0];
310 38
                        $sub_value = $sub_element[1];
311
                        $values[] = $this->parseHeaderElement($sub_type, $sub_value, $document);
312 38
                    }
313 1
                }
314
315
                return new ElementArray($values, $document);
316 38
317
            case 'endstream':
318
            case 'obj': // I don't know what it means but got my project fixed.
319
            case '':
320
                // Nothing to do with.
321
                return null;
322
323
            default:
324
                throw new \Exception('Invalid type: "'.$type.'".');
325
        }
326
    }
327
}
328