Passed
Push — issue-398 ( bfe6ba )
by Sebastien
02:43
created

PDFObject   F

Complexity

Total Complexity 152

Size/Duplication

Total Lines 765
Duplicated Lines 0 %

Test Coverage

Coverage 89.92%

Importance

Changes 2
Bugs 2 Features 0
Metric Value
eloc 398
c 2
b 2
f 0
dl 0
loc 765
ccs 357
cts 397
cp 0.8992
rs 2
wmc 152

15 Methods

Rating   Name   Duplication   Size   Complexity  
A getContent() 0 3 1
A getHeader() 0 3 1
B cleanContent() 0 57 11
A has() 0 3 1
A init() 0 2 1
A getDefaultFont() 0 14 3
A __construct() 0 10 2
A get() 0 3 1
A getDetails() 0 3 1
A getSectionsText() 0 35 6
A getUniqueId() 0 3 1
F getCommandsText() 0 149 27
D getTextArray() 0 118 35
F getText() 0 195 52
B factory() 0 39 9

How to fix   Complexity   

Complex Class

Complex classes like PDFObject often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use PDFObject, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
use Smalot\PdfParser\XObject\Form;
34
use Smalot\PdfParser\XObject\Image;
35
36
/**
37
 * Class PDFObject
38
 */
39
class PDFObject
40
{
41
    const TYPE = 't';
42
43
    const OPERATOR = 'o';
44
45
    const COMMAND = 'c';
46
47
    /**
48
     * The recursion stack.
49
     *
50
     * @var array
51
     */
52
    public static $recursionStack = [];
53
54
    /**
55
     * @var Document
56
     */
57
    protected $document = null;
58
59
    /**
60
     * @var Header
61
     */
62
    protected $header = null;
63
64
    /**
65
     * @var string
66
     */
67
    protected $content = null;
68
69
    /**
70
     * @var Config
71
     */
72
    protected $config;
73
74
    /**
75
     * @param Header $header
76
     * @param string $content
77
     * @param Config $config
78
     */
79 43
    public function __construct(
80
        Document $document,
81
        Header $header = null,
82
        $content = null,
83
        Config $config = null
84
    ) {
85 43
        $this->document = $document;
86 43
        $this->header = null !== $header ? $header : new Header();
87 43
        $this->content = $content;
88 43
        $this->config = $config;
89 43
    }
90
91 35
    public function init()
92
    {
93 35
    }
94
95
    /**
96
     * @return Header|null
97
     */
98 35
    public function getHeader()
99
    {
100 35
        return $this->header;
101
    }
102
103
    /**
104
     * @param string $name
105
     *
106
     * @return Element|PDFObject
107
     */
108 33
    public function get($name)
109
    {
110 33
        return $this->header->get($name);
111
    }
112
113
    /**
114
     * @param string $name
115
     *
116
     * @return bool
117
     */
118 32
    public function has($name)
119
    {
120 32
        return $this->header->has($name);
121
    }
122
123
    /**
124
     * @param bool $deep
125
     *
126
     * @return array
127
     */
128 3
    public function getDetails($deep = true)
129
    {
130 3
        return $this->header->getDetails($deep);
131
    }
132
133
    /**
134
     * @return string|null
135
     */
136 26
    public function getContent()
137
    {
138 26
        return $this->content;
139
    }
140
141
    /**
142
     * @param string $content
143
     */
144 20
    public function cleanContent($content, $char = 'X')
145
    {
146 20
        $char = $char[0];
147 20
        $content = str_replace(['\\\\', '\\)', '\\('], $char.$char, $content);
148
149
        // Remove image bloc with binary content
150 20
        preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
151 20
        foreach ($matches[0] as $part) {
152
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
153
        }
154
155
        // Clean content in square brackets [.....]
156 20
        preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, \PREG_OFFSET_CAPTURE);
0 ignored issues
show
Unused Code introduced by
The call to preg_match_all() has too many arguments starting with PREG_OFFSET_CAPTURE. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

156
        /** @scrutinizer ignore-call */ 
157
        preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, \PREG_OFFSET_CAPTURE);

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
157 20
        foreach ($matches[1] as $part) {
158 15
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
159
        }
160
161
        // Clean content in round brackets (.....)
162 20
        preg_match_all('/\((.*?)\)/s', $content, $matches, \PREG_OFFSET_CAPTURE);
163 20
        foreach ($matches[1] as $part) {
164 14
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
165
        }
166
167
        // Clean structure
168 20
        if ($parts = preg_split('/(<|>)/s', $content, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE)) {
0 ignored issues
show
Bug introduced by
It seems like $content can also be of type array; however, parameter $subject of preg_split() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

168
        if ($parts = preg_split('/(<|>)/s', /** @scrutinizer ignore-type */ $content, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE)) {
Loading history...
169 20
            $content = '';
170 20
            $level = 0;
171 20
            foreach ($parts as $part) {
172 20
                if ('<' == $part) {
173 13
                    ++$level;
174
                }
175
176 20
                $content .= (0 == $level ? $part : str_repeat($char, \strlen($part)));
177
178 20
                if ('>' == $part) {
179 13
                    --$level;
180
                }
181
            }
182
        }
183
184
        // Clean BDC and EMC markup
185 20
        preg_match_all(
186 20
            '/(\/[A-Za-z0-9\_]*\s*'.preg_quote($char).'*BDC)/s',
187
            $content,
188
            $matches,
189 20
            \PREG_OFFSET_CAPTURE
190
        );
191 20
        foreach ($matches[1] as $part) {
192 4
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
193
        }
194
195 20
        preg_match_all('/\s(EMC)\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
196 20
        foreach ($matches[1] as $part) {
197 8
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
198
        }
199
200 20
        return $content;
201
    }
202
203
    /**
204
     * @param string $content
205
     *
206
     * @return array
207
     */
208 19
    public function getSectionsText($content)
209
    {
210 19
        $sections = [];
211 19
        $content = ' '.$content.' ';
212 19
        $textCleaned = $this->cleanContent($content, '_');
213
214
        // Extract text blocks.
215 19
        if (preg_match_all('/\s+BT[\s|\(|\[]+(.*?)\s*ET/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
0 ignored issues
show
Unused Code introduced by
The call to preg_match_all() has too many arguments starting with PREG_OFFSET_CAPTURE. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

215
        if (/** @scrutinizer ignore-call */ preg_match_all('/\s+BT[\s|\(|\[]+(.*?)\s*ET/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
216 19
            foreach ($matches[1] as $part) {
217 19
                $text = $part[0];
218 19
                if ('' === $text) {
219
                    continue;
220
                }
221 19
                $offset = $part[1];
222 19
                $section = substr($content, $offset, \strlen($text));
223
224
                // Removes BDC and EMC markup.
225 19
                $section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section.' ');
226
227 19
                $sections[] = $section;
228
            }
229
        }
230
231
        // Extract 'do' commands.
232 19
        if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
233 4
            foreach ($matches[1] as $part) {
234 4
                $text = $part[0];
235 4
                $offset = $part[1];
236 4
                $section = substr($content, $offset, \strlen($text));
237
238 4
                $sections[] = $section;
239
            }
240
        }
241
242 19
        return $sections;
243
    }
244
245 12
    private function getDefaultFont(Page $page = null)
246
    {
247 12
        $fonts = [];
248 12
        if (null !== $page) {
249 12
            $fonts = $page->getFonts();
250
        }
251
252 12
        $fonts = array_merge($fonts, array_values($this->document->getFonts()));
253
254 12
        if (\count($fonts) > 0) {
255 12
            return reset($fonts);
256
        }
257
258
        return new Font($this->document);
259
    }
260
261
    /**
262
     * @param Page $page
263
     *
264
     * @return string
265
     *
266
     * @throws \Exception
267
     */
268 12
    public function getText(Page $page = null)
269
    {
270 12
        $result = '';
271 12
        $sections = $this->getSectionsText($this->content);
272 12
        $current_font = $this->getDefaultFont($page);
273
274 12
        $current_position_td = ['x' => false, 'y' => false];
275 12
        $current_position_tm = ['x' => false, 'y' => false];
276
277 12
        self::$recursionStack[] = $this->getUniqueId();
278
279 12
        foreach ($sections as $section) {
280 12
            $commands = $this->getCommandsText($section);
281 12
            $reverse_text = false;
282 12
            $text = '';
283
284 12
            foreach ($commands as $command) {
285 12
                switch ($command[self::OPERATOR]) {
286 12
                    case 'BMC':
287 2
                        if ('ReversedChars' == $command[self::COMMAND]) {
288 2
                            $reverse_text = true;
289
                        }
290 2
                        break;
291
292
                    // set character spacing
293 12
                    case 'Tc':
294 3
                        break;
295
296
                    // move text current point
297 12
                    case 'Td':
298 9
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
299 9
                        $y = array_pop($args);
300 9
                        $x = array_pop($args);
301 9
                        if (((float) $x <= 0) ||
302 9
                            (false !== $current_position_td['y'] && (float) $y < (float) ($current_position_td['y']))
303
                        ) {
304
                            // vertical offset
305 6
                            $text .= "\n";
306 9
                        } elseif (false !== $current_position_td['x'] && (float) $x > (float) (
307 9
                                $current_position_td['x']
308
                            )
309
                        ) {
310
                            // horizontal offset
311 7
                            $text .= ' ';
312
                        }
313 9
                        $current_position_td = ['x' => $x, 'y' => $y];
314 9
                        break;
315
316
                    // move text current point and set leading
317 12
                    case 'TD':
318 2
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
319 2
                        $y = array_pop($args);
320 2
                        $x = array_pop($args);
321 2
                        if ((float) $y < 0) {
322 2
                            $text .= "\n";
323
                        } elseif ((float) $x <= 0) {
324
                            $text .= ' ';
325
                        }
326 2
                        break;
327
328 12
                    case 'Tf':
329 12
                        list($id) = preg_split('/\s/s', $command[self::COMMAND]);
330 12
                        $id = trim($id, '/');
331 12
                        if (null !== $page) {
332 12
                            $new_font = $page->getFont($id);
333
                            // If an invalid font ID is given, do not update the font.
334
                            // This should theoretically never happen, as the PDF spec states for the Tf operator:
335
                            // "The specified font value shall match a resource name in the Font entry of the default resource dictionary"
336
                            // (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf, page 435)
337
                            // But we want to make sure that malformed PDFs do not simply crash.
338 12
                            if (null !== $new_font) {
339 11
                                $current_font = $new_font;
340
                            }
341
                        }
342 12
                        break;
343
344 12
                    case "'":
345 12
                    case 'Tj':
346 9
                        $command[self::COMMAND] = [$command];
347
                        // no break
348 12
                    case 'TJ':
349 12
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
350 12
                        $text .= $sub_text;
351 12
                        break;
352
353
                    // set leading
354 11
                    case 'TL':
355 1
                        $text .= ' ';
356 1
                        break;
357
358 11
                    case 'Tm':
359 11
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
360 11
                        $y = array_pop($args);
361 11
                        $x = array_pop($args);
362 11
                        if (false !== $current_position_tm['x']) {
363 11
                            $delta = abs((float) $x - (float) ($current_position_tm['x']));
364 11
                            if ($delta > 10) {
365 9
                                $text .= "\t";
366
                            }
367
                        }
368 11
                        if (false !== $current_position_tm['y']) {
369 11
                            $delta = abs((float) $y - (float) ($current_position_tm['y']));
370 11
                            if ($delta > 10) {
371 7
                                $text .= "\n";
372
                            }
373
                        }
374 11
                        $current_position_tm = ['x' => $x, 'y' => $y];
375 11
                        break;
376
377
                    // set super/subscripting text rise
378 8
                    case 'Ts':
379
                        break;
380
381
                    // set word spacing
382 8
                    case 'Tw':
383 2
                        break;
384
385
                    // set horizontal scaling
386 8
                    case 'Tz':
387
                        $text .= "\n";
388
                        break;
389
390
                    // move to start of next line
391 8
                    case 'T*':
392 3
                        $text .= "\n";
393 3
                        break;
394
395 7
                    case 'Da':
396
                        break;
397
398 7
                    case 'Do':
399 4
                        if (null !== $page) {
400 4
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
401 4
                            $id = trim(array_pop($args), '/ ');
402 4
                            $xobject = $page->getXObject($id);
403
404
                            // @todo $xobject could be a ElementXRef object, which would then throw an error
405 4
                            if (\is_object($xobject) && $xobject instanceof self && !\in_array($xobject->getUniqueId(), self::$recursionStack)) {
406
                                // Not a circular reference.
407 4
                                $text .= $xobject->getText($page);
408
                            }
409
                        }
410 4
                        break;
411
412 6
                    case 'rg':
413 6
                    case 'RG':
414 2
                        break;
415
416 6
                    case 're':
417
                        break;
418
419 6
                    case 'co':
420
                        break;
421
422 6
                    case 'cs':
423 1
                        break;
424
425 6
                    case 'gs':
426 4
                        break;
427
428 5
                    case 'en':
429
                        break;
430
431 5
                    case 'sc':
432 5
                    case 'SC':
433
                        break;
434
435 5
                    case 'g':
436 5
                    case 'G':
437 2
                        break;
438
439 4
                    case 'V':
440
                        break;
441
442 4
                    case 'vo':
443 4
                    case 'Vo':
444
                        break;
445
446
                    default:
447
                }
448
            }
449
450
            // Fix Hebrew and other reverse text oriented languages.
451
            // @see: https://github.com/smalot/pdfparser/issues/398
452 12
            if ($reverse_text) {
453 2
                $chars = mb_str_split($text, 1, mb_internal_encoding());
0 ignored issues
show
Bug introduced by
It seems like mb_internal_encoding() can also be of type true; however, parameter $encoding of mb_str_split() does only seem to accept null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

453
                $chars = mb_str_split($text, 1, /** @scrutinizer ignore-type */ mb_internal_encoding());
Loading history...
454 2
                $text = implode('', array_reverse($chars));
455
            }
456
457 12
            $result .= $text;
458
        }
459
460 12
        array_pop(self::$recursionStack);
461
462 12
        return $result.' ';
463
    }
464
465
    /**
466
     * @param Page $page
467
     *
468
     * @return array
469
     *
470
     * @throws \Exception
471
     */
472 3
    public function getTextArray(Page $page = null)
473
    {
474 3
        $text = [];
475 3
        $sections = $this->getSectionsText($this->content);
476 3
        $current_font = new Font($this->document);
477
478 3
        foreach ($sections as $section) {
479 3
            $commands = $this->getCommandsText($section);
480
481 3
            foreach ($commands as $command) {
482 3
                switch ($command[self::OPERATOR]) {
483
                    // set character spacing
484 3
                    case 'Tc':
485 2
                        break;
486
487
                    // move text current point
488 3
                    case 'Td':
489 3
                        break;
490
491
                    // move text current point and set leading
492 3
                    case 'TD':
493
                        break;
494
495 3
                    case 'Tf':
496 3
                        if (null !== $page) {
497 3
                            list($id) = preg_split('/\s/s', $command[self::COMMAND]);
498 3
                            $id = trim($id, '/');
499 3
                            $current_font = $page->getFont($id);
500
                        }
501 3
                        break;
502
503 3
                    case "'":
504 3
                    case 'Tj':
505 3
                        $command[self::COMMAND] = [$command];
506
                        // no break
507 3
                    case 'TJ':
508 3
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
509 3
                        $text[] = $sub_text;
510 3
                        break;
511
512
                    // set leading
513 3
                    case 'TL':
514 2
                        break;
515
516 3
                    case 'Tm':
517 2
                        break;
518
519
                    // set super/subscripting text rise
520 3
                    case 'Ts':
521
                        break;
522
523
                    // set word spacing
524 3
                    case 'Tw':
525 1
                        break;
526
527
                    // set horizontal scaling
528 3
                    case 'Tz':
529
                        //$text .= "\n";
530
                        break;
531
532
                    // move to start of next line
533 3
                    case 'T*':
534
                        //$text .= "\n";
535 2
                        break;
536
537 3
                    case 'Da':
538
                        break;
539
540 3
                    case 'Do':
541
                        if (null !== $page) {
542
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
543
                            $id = trim(array_pop($args), '/ ');
544
                            if ($xobject = $page->getXObject($id)) {
545
                                $text[] = $xobject->getText($page);
546
                            }
547
                        }
548
                        break;
549
550 3
                    case 'rg':
551 3
                    case 'RG':
552 2
                        break;
553
554 3
                    case 're':
555
                        break;
556
557 3
                    case 'co':
558
                        break;
559
560 3
                    case 'cs':
561
                        break;
562
563 3
                    case 'gs':
564
                        break;
565
566 3
                    case 'en':
567
                        break;
568
569 3
                    case 'sc':
570 3
                    case 'SC':
571
                        break;
572
573 3
                    case 'g':
574 3
                    case 'G':
575 2
                        break;
576
577 1
                    case 'V':
578
                        break;
579
580 1
                    case 'vo':
581 1
                    case 'Vo':
582
                        break;
583
584
                    default:
585
                }
586
            }
587
        }
588
589 3
        return $text;
590
    }
591
592
    /**
593
     * @param string $text_part
594
     * @param int    $offset
595
     *
596
     * @return array
597
     */
598 19
    public function getCommandsText($text_part, &$offset = 0)
599
    {
600 19
        $commands = $matches = [];
601
602 19
        while ($offset < \strlen($text_part)) {
603 19
            $offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset);
604 19
            $char = $text_part[$offset];
605
606 19
            $operator = '';
607 19
            $type = '';
608 19
            $command = false;
609
610 19
            switch ($char) {
611 19
                case '/':
612 19
                    $type = $char;
613 19
                    if (preg_match(
614 19
                        '/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
615 19
                        substr($text_part, $offset),
616
                        $matches
617
                    )
618
                    ) {
619 19
                        $operator = $matches[2];
620 19
                        $command = $matches[1];
621 19
                        $offset += \strlen($matches[0]);
622 7
                    } elseif (preg_match(
623 7
                        '/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si',
624 7
                        substr($text_part, $offset),
625
                        $matches
626
                    )
627
                    ) {
628 7
                        $operator = $matches[2];
629 7
                        $command = $matches[1];
630 7
                        $offset += \strlen($matches[0]);
631
                    }
632 19
                    break;
633
634 19
                case '[':
635 19
                case ']':
636
                    // array object
637 17
                    $type = $char;
638 17
                    if ('[' == $char) {
639 17
                        ++$offset;
640
                        // get elements
641 17
                        $command = $this->getCommandsText($text_part, $offset);
642
643 17
                        if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
644 17
                            $operator = trim($matches[0]);
645 17
                            $offset += \strlen($matches[0]);
646
                        }
647
                    } else {
648 17
                        ++$offset;
649 17
                        break;
650
                    }
651 17
                    break;
652
653 19
                case '<':
654 19
                case '>':
655
                    // array object
656 9
                    $type = $char;
657 9
                    ++$offset;
658 9
                    if ('<' == $char) {
659 9
                        $strpos = strpos($text_part, '>', $offset);
660 9
                        $command = substr($text_part, $offset, ($strpos - $offset));
661 9
                        $offset = $strpos + 1;
662
                    }
663
664 9
                    if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
665 8
                        $operator = trim($matches[0]);
666 8
                        $offset += \strlen($matches[0]);
667
                    }
668 9
                    break;
669
670 19
                case '(':
671 19
                case ')':
672 14
                    ++$offset;
673 14
                    $type = $char;
674 14
                    $strpos = $offset;
675 14
                    if ('(' == $char) {
676 14
                        $open_bracket = 1;
677 14
                        while ($open_bracket > 0) {
678 14
                            if (!isset($text_part[$strpos])) {
679
                                break;
680
                            }
681 14
                            $ch = $text_part[$strpos];
682 14
                            switch ($ch) {
683 14
                                case '\\':
684
                                 // REVERSE SOLIDUS (5Ch) (Backslash)
685
                                    // skip next character
686 11
                                    ++$strpos;
687 11
                                    break;
688
689 14
                                case '(':
690
                                 // LEFT PARENHESIS (28h)
691
                                    ++$open_bracket;
692
                                    break;
693
694 14
                                case ')':
695
                                 // RIGHT PARENTHESIS (29h)
696 14
                                    --$open_bracket;
697 14
                                    break;
698
                            }
699 14
                            ++$strpos;
700
                        }
701 14
                        $command = substr($text_part, $offset, ($strpos - $offset - 1));
702 14
                        $offset = $strpos;
703
704 14
                        if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) {
705 12
                            $operator = $matches[1];
706 12
                            $offset += \strlen($matches[0]);
707
                        }
708
                    }
709 14
                    break;
710
711
                default:
712 19
                    if ('ET' == substr($text_part, $offset, 2)) {
713 1
                        break;
714 19
                    } elseif (preg_match(
715 19
                        '/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
716 19
                        substr($text_part, $offset),
717
                        $matches
718
                    )
719
                    ) {
720 19
                        $operator = trim($matches['id']);
721 19
                        $command = trim($matches['data']);
722 19
                        $offset += \strlen($matches[0]);
723 17
                    } elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) {
724 16
                        $type = 'n';
725 16
                        $command = trim($matches[0]);
726 16
                        $offset += \strlen($matches[0]);
727 10
                    } elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) {
728 10
                        $type = '';
729 10
                        $operator = $matches[1];
730 10
                        $command = '';
731 10
                        $offset += \strlen($matches[0]);
732
                    }
733
            }
734
735 19
            if (false !== $command) {
736 19
                $commands[] = [
737 19
                    self::TYPE => $type,
738 19
                    self::OPERATOR => $operator,
739 19
                    self::COMMAND => $command,
740
                ];
741
            } else {
742 17
                break;
743
            }
744
        }
745
746 19
        return $commands;
747
    }
748
749
    /**
750
     * @param string $content
751
     *
752
     * @return PDFObject
753
     */
754 28
    public static function factory(
755
        Document $document,
756
        Header $header,
757
        $content,
758
        Config $config = null
759
    ) {
760 28
        switch ($header->get('Type')->getContent()) {
761 28
            case 'XObject':
762 5
                switch ($header->get('Subtype')->getContent()) {
763 5
                    case 'Image':
764 3
                        return new Image($document, $header, $content, $config);
765
766 3
                    case 'Form':
767 3
                        return new Form($document, $header, $content, $config);
768
                }
769
770
                return new self($document, $header, $content, $config);
771
772 28
            case 'Pages':
773 27
                return new Pages($document, $header, $content, $config);
774
775 28
            case 'Page':
776 27
                return new Page($document, $header, $content, $config);
777
778 28
            case 'Encoding':
779 6
                return new Encoding($document, $header, $content, $config);
780
781 28
            case 'Font':
782 27
                $subtype = $header->get('Subtype')->getContent();
783 27
                $classname = '\Smalot\PdfParser\Font\Font'.$subtype;
784
785 27
                if (class_exists($classname)) {
786 27
                    return new $classname($document, $header, $content, $config);
787
                }
788
789
                return new Font($document, $header, $content, $config);
790
791
            default:
792 28
                return new self($document, $header, $content, $config);
793
        }
794
    }
795
796
    /**
797
     * Returns unique id identifying the object.
798
     *
799
     * @return string
800
     */
801 12
    protected function getUniqueId()
802
    {
803 12
        return spl_object_hash($this);
804
    }
805
}
806