Passed
Push — master ( 320582...b8784e )
by Konrad
04:42 queued 01:53
created

PDFObject::getTextArray()   D

Complexity

Conditions 35
Paths 85

Size

Total Lines 118
Code Lines 73

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 55
CRAP Score 55.7453

Importance

Changes 0
Metric Value
cc 35
eloc 73
c 0
b 0
f 0
nc 85
nop 1
dl 0
loc 118
ccs 55
cts 74
cp 0.7432
crap 55.7453
rs 4.1666

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
use Smalot\PdfParser\XObject\Form;
34
use Smalot\PdfParser\XObject\Image;
35
36
/**
37
 * Class PDFObject
38
 */
39
class PDFObject
40
{
41
    const TYPE = 't';
42
43
    const OPERATOR = 'o';
44
45
    const COMMAND = 'c';
46
47
    /**
48
     * The recursion stack.
49
     *
50
     * @var array
51
     */
52
    public static $recursionStack = [];
53
54
    /**
55
     * @var Document
56
     */
57
    protected $document = null;
58
59
    /**
60
     * @var Header
61
     */
62
    protected $header = null;
63
64
    /**
65
     * @var string
66
     */
67
    protected $content = null;
68
69
    /**
70
     * @var Config
71
     */
72
    protected $config;
73
74
    /**
75
     * @param Header $header
76
     * @param string $content
77
     * @param Config $config
78
     */
79 46
    public function __construct(
80
        Document $document,
81
        Header $header = null,
82
        $content = null,
83
        Config $config = null
84
    ) {
85 46
        $this->document = $document;
86 46
        $this->header = null !== $header ? $header : new Header();
87 46
        $this->content = $content;
88 46
        $this->config = $config;
89 46
    }
90
91 37
    public function init()
92
    {
93 37
    }
94
95
    /**
96
     * @return Header|null
97
     */
98 37
    public function getHeader()
99
    {
100 37
        return $this->header;
101
    }
102
103
    /**
104
     * @param string $name
105
     *
106
     * @return Element|PDFObject
107
     */
108 36
    public function get($name)
109
    {
110 36
        return $this->header->get($name);
111
    }
112
113
    /**
114
     * @param string $name
115
     *
116
     * @return bool
117
     */
118 34
    public function has($name)
119
    {
120 34
        return $this->header->has($name);
121
    }
122
123
    /**
124
     * @param bool $deep
125
     *
126
     * @return array
127
     */
128 3
    public function getDetails($deep = true)
129
    {
130 3
        return $this->header->getDetails($deep);
131
    }
132
133
    /**
134
     * @return string|null
135
     */
136 28
    public function getContent()
137
    {
138 28
        return $this->content;
139
    }
140
141
    /**
142
     * @param string $content
143
     */
144 22
    public function cleanContent($content, $char = 'X')
145
    {
146 22
        $char = $char[0];
147 22
        $content = str_replace(['\\\\', '\\)', '\\('], $char.$char, $content);
148
149
        // Remove image bloc with binary content
150 22
        preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
151 22
        foreach ($matches[0] as $part) {
152
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
153
        }
154
155
        // Clean content in square brackets [.....]
156 22
        preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, \PREG_OFFSET_CAPTURE);
0 ignored issues
show
Unused Code introduced by
The call to preg_match_all() has too many arguments starting with PREG_OFFSET_CAPTURE. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

156
        /** @scrutinizer ignore-call */ 
157
        preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, \PREG_OFFSET_CAPTURE);

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
157 22
        foreach ($matches[1] as $part) {
158 16
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
159
        }
160
161
        // Clean content in round brackets (.....)
162 22
        preg_match_all('/\((.*?)\)/s', $content, $matches, \PREG_OFFSET_CAPTURE);
163 22
        foreach ($matches[1] as $part) {
164 15
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
165
        }
166
167
        // Clean structure
168 22
        if ($parts = preg_split('/(<|>)/s', $content, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE)) {
0 ignored issues
show
Bug introduced by
It seems like $content can also be of type array; however, parameter $subject of preg_split() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

168
        if ($parts = preg_split('/(<|>)/s', /** @scrutinizer ignore-type */ $content, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE)) {
Loading history...
169 22
            $content = '';
170 22
            $level = 0;
171 22
            foreach ($parts as $part) {
172 22
                if ('<' == $part) {
173 15
                    ++$level;
174
                }
175
176 22
                $content .= (0 == $level ? $part : str_repeat($char, \strlen($part)));
177
178 22
                if ('>' == $part) {
179 15
                    --$level;
180
                }
181
            }
182
        }
183
184
        // Clean BDC and EMC markup
185 22
        preg_match_all(
186 22
            '/(\/[A-Za-z0-9\_]*\s*'.preg_quote($char).'*BDC)/s',
187
            $content,
188
            $matches,
189 22
            \PREG_OFFSET_CAPTURE
190
        );
191 22
        foreach ($matches[1] as $part) {
192 4
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
193
        }
194
195 22
        preg_match_all('/\s(EMC)\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
196 22
        foreach ($matches[1] as $part) {
197 8
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
198
        }
199
200 22
        return $content;
201
    }
202
203
    /**
204
     * @param string $content
205
     *
206
     * @return array
207
     */
208 21
    public function getSectionsText($content)
209
    {
210 21
        $sections = [];
211 21
        $content = ' '.$content.' ';
212 21
        $textCleaned = $this->cleanContent($content, '_');
213
214
        // Extract text blocks.
215 21
        if (preg_match_all('/(\sQ)?\s+BT[\s|\(|\[]+(.*?)\s*ET(\sq)?/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
0 ignored issues
show
Unused Code introduced by
The call to preg_match_all() has too many arguments starting with PREG_OFFSET_CAPTURE. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

215
        if (/** @scrutinizer ignore-call */ preg_match_all('/(\sQ)?\s+BT[\s|\(|\[]+(.*?)\s*ET(\sq)?/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
216 21
            foreach ($matches[2] as $pos => $part) {
217 21
                $text = $part[0];
218 21
                if ('' === $text) {
219
                    continue;
220
                }
221 21
                $offset = $part[1];
222 21
                $section = substr($content, $offset, \strlen($text));
223
224
                // Removes BDC and EMC markup.
225 21
                $section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section.' ');
226
227
                // Add Q and q flags if detected around BT/ET.
228
                // @see: https://github.com/smalot/pdfparser/issues/387
229 21
                $section = trim((!empty($matches[1][$pos][0]) ? "Q\n" : '').$section).(!empty($matches[3][$pos][0]) ? "\nq" : '');
230
231 21
                $sections[] = $section;
232
            }
233
        }
234
235
        // Extract 'do' commands.
236 21
        if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
237 5
            foreach ($matches[1] as $part) {
238 5
                $text = $part[0];
239 5
                $offset = $part[1];
240 5
                $section = substr($content, $offset, \strlen($text));
241
242 5
                $sections[] = $section;
243
            }
244
        }
245
246 21
        return $sections;
247
    }
248
249 14
    private function getDefaultFont(Page $page = null)
250
    {
251 14
        $fonts = [];
252 14
        if (null !== $page) {
253 14
            $fonts = $page->getFonts();
254
        }
255
256 14
        $fonts = array_merge($fonts, array_values($this->document->getFonts()));
257
258 14
        if (\count($fonts) > 0) {
259 14
            return reset($fonts);
260
        }
261
262
        return new Font($this->document);
263
    }
264
265
    /**
266
     * @param Page $page
267
     *
268
     * @return string
269
     *
270
     * @throws \Exception
271
     */
272 14
    public function getText(Page $page = null)
273
    {
274 14
        $result = '';
275 14
        $sections = $this->getSectionsText($this->content);
276 14
        $current_font = $this->getDefaultFont($page);
277 14
        $clipped_font = $current_font;
278
279 14
        $current_position_td = ['x' => false, 'y' => false];
280 14
        $current_position_tm = ['x' => false, 'y' => false];
281
282 14
        self::$recursionStack[] = $this->getUniqueId();
283
284 14
        foreach ($sections as $section) {
285 14
            $commands = $this->getCommandsText($section);
286 14
            $reverse_text = false;
287 14
            $text = '';
288
289 14
            foreach ($commands as $command) {
290 14
                switch ($command[self::OPERATOR]) {
291 14
                    case 'BMC':
292 2
                        if ('ReversedChars' == $command[self::COMMAND]) {
293 2
                            $reverse_text = true;
294
                        }
295 2
                        break;
296
297
                    // set character spacing
298 14
                    case 'Tc':
299 3
                        break;
300
301
                    // move text current point
302 14
                    case 'Td':
303 11
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
304 11
                        $y = array_pop($args);
305 11
                        $x = array_pop($args);
306 11
                        if (((float) $x <= 0) ||
307 11
                            (false !== $current_position_td['y'] && (float) $y < (float) ($current_position_td['y']))
308
                        ) {
309
                            // vertical offset
310 7
                            $text .= "\n";
311 11
                        } elseif (false !== $current_position_td['x'] && (float) $x > (float) (
312 11
                                $current_position_td['x']
313
                            )
314
                        ) {
315
                            // horizontal offset
316 8
                            $text .= ' ';
317
                        }
318 11
                        $current_position_td = ['x' => $x, 'y' => $y];
319 11
                        break;
320
321
                    // move text current point and set leading
322 14
                    case 'TD':
323 2
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
324 2
                        $y = array_pop($args);
325 2
                        $x = array_pop($args);
326 2
                        if ((float) $y < 0) {
327 2
                            $text .= "\n";
328
                        } elseif ((float) $x <= 0) {
329
                            $text .= ' ';
330
                        }
331 2
                        break;
332
333 14
                    case 'Tf':
334 14
                        list($id) = preg_split('/\s/s', $command[self::COMMAND]);
335 14
                        $id = trim($id, '/');
336 14
                        if (null !== $page) {
337 14
                            $new_font = $page->getFont($id);
338
                            // If an invalid font ID is given, do not update the font.
339
                            // This should theoretically never happen, as the PDF spec states for the Tf operator:
340
                            // "The specified font value shall match a resource name in the Font entry of the default resource dictionary"
341
                            // (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf, page 435)
342
                            // But we want to make sure that malformed PDFs do not simply crash.
343 14
                            if (null !== $new_font) {
344 13
                                $current_font = $new_font;
345
                            }
346
                        }
347 14
                        break;
348
349 14
                    case 'Q':
350
                        // Use clip: restore font.
351 4
                        $current_font = $clipped_font;
352 4
                        break;
353
354 14
                    case 'q':
355
                        // Use clip: save font.
356 4
                        $clipped_font = $current_font;
357 4
                        break;
358
359 14
                    case "'":
360 14
                    case 'Tj':
361 9
                        $command[self::COMMAND] = [$command];
362
                        // no break
363 14
                    case 'TJ':
364 14
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
365 14
                        $text .= $sub_text;
366 14
                        break;
367
368
                    // set leading
369 12
                    case 'TL':
370 1
                        $text .= ' ';
371 1
                        break;
372
373 12
                    case 'Tm':
374 12
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
375 12
                        $y = array_pop($args);
376 12
                        $x = array_pop($args);
377 12
                        if (false !== $current_position_tm['x']) {
378 12
                            $delta = abs((float) $x - (float) ($current_position_tm['x']));
379 12
                            if ($delta > 10) {
380 10
                                $text .= "\t";
381
                            }
382
                        }
383 12
                        if (false !== $current_position_tm['y']) {
384 12
                            $delta = abs((float) $y - (float) ($current_position_tm['y']));
385 12
                            if ($delta > 10) {
386 8
                                $text .= "\n";
387
                            }
388
                        }
389 12
                        $current_position_tm = ['x' => $x, 'y' => $y];
390 12
                        break;
391
392
                    // set super/subscripting text rise
393 9
                    case 'Ts':
394
                        break;
395
396
                    // set word spacing
397 9
                    case 'Tw':
398 2
                        break;
399
400
                    // set horizontal scaling
401 9
                    case 'Tz':
402
                        $text .= "\n";
403
                        break;
404
405
                    // move to start of next line
406 9
                    case 'T*':
407 3
                        $text .= "\n";
408 3
                        break;
409
410 8
                    case 'Da':
411
                        break;
412
413 8
                    case 'Do':
414 5
                        if (null !== $page) {
415 5
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
416 5
                            $id = trim(array_pop($args), '/ ');
417 5
                            $xobject = $page->getXObject($id);
418
419
                            // @todo $xobject could be a ElementXRef object, which would then throw an error
420 5
                            if (\is_object($xobject) && $xobject instanceof self && !\in_array($xobject->getUniqueId(), self::$recursionStack)) {
421
                                // Not a circular reference.
422 5
                                $text .= $xobject->getText($page);
423
                            }
424
                        }
425 5
                        break;
426
427 6
                    case 'rg':
428 6
                    case 'RG':
429 2
                        break;
430
431 6
                    case 're':
432
                        break;
433
434 6
                    case 'co':
435
                        break;
436
437 6
                    case 'cs':
438 1
                        break;
439
440 6
                    case 'gs':
441 4
                        break;
442
443 5
                    case 'en':
444
                        break;
445
446 5
                    case 'sc':
447 5
                    case 'SC':
448
                        break;
449
450 5
                    case 'g':
451 5
                    case 'G':
452 2
                        break;
453
454 4
                    case 'V':
455
                        break;
456
457 4
                    case 'vo':
458 4
                    case 'Vo':
459
                        break;
460
461
                    default:
462
                }
463
            }
464
465
            // Fix Hebrew and other reverse text oriented languages.
466
            // @see: https://github.com/smalot/pdfparser/issues/398
467 14
            if ($reverse_text) {
468 2
                $chars = mb_str_split($text, 1, mb_internal_encoding());
0 ignored issues
show
Bug introduced by
It seems like mb_internal_encoding() can also be of type true; however, parameter $encoding of mb_str_split() does only seem to accept null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

468
                $chars = mb_str_split($text, 1, /** @scrutinizer ignore-type */ mb_internal_encoding());
Loading history...
469 2
                $text = implode('', array_reverse($chars));
470
            }
471
472 14
            $result .= $text;
473
        }
474
475 14
        array_pop(self::$recursionStack);
476
477 14
        return $result.' ';
478
    }
479
480
    /**
481
     * @param Page $page
482
     *
483
     * @return array
484
     *
485
     * @throws \Exception
486
     */
487 3
    public function getTextArray(Page $page = null)
488
    {
489 3
        $text = [];
490 3
        $sections = $this->getSectionsText($this->content);
491 3
        $current_font = new Font($this->document);
492
493 3
        foreach ($sections as $section) {
494 3
            $commands = $this->getCommandsText($section);
495
496 3
            foreach ($commands as $command) {
497 3
                switch ($command[self::OPERATOR]) {
498
                    // set character spacing
499 3
                    case 'Tc':
500 2
                        break;
501
502
                    // move text current point
503 3
                    case 'Td':
504 3
                        break;
505
506
                    // move text current point and set leading
507 3
                    case 'TD':
508
                        break;
509
510 3
                    case 'Tf':
511 3
                        if (null !== $page) {
512 3
                            list($id) = preg_split('/\s/s', $command[self::COMMAND]);
513 3
                            $id = trim($id, '/');
514 3
                            $current_font = $page->getFont($id);
515
                        }
516 3
                        break;
517
518 3
                    case "'":
519 3
                    case 'Tj':
520 3
                        $command[self::COMMAND] = [$command];
521
                        // no break
522 3
                    case 'TJ':
523 3
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
524 3
                        $text[] = $sub_text;
525 3
                        break;
526
527
                    // set leading
528 3
                    case 'TL':
529 2
                        break;
530
531 3
                    case 'Tm':
532 2
                        break;
533
534
                    // set super/subscripting text rise
535 3
                    case 'Ts':
536
                        break;
537
538
                    // set word spacing
539 3
                    case 'Tw':
540 1
                        break;
541
542
                    // set horizontal scaling
543 3
                    case 'Tz':
544
                        //$text .= "\n";
545
                        break;
546
547
                    // move to start of next line
548 3
                    case 'T*':
549
                        //$text .= "\n";
550 2
                        break;
551
552 3
                    case 'Da':
553
                        break;
554
555 3
                    case 'Do':
556
                        if (null !== $page) {
557
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
558
                            $id = trim(array_pop($args), '/ ');
559
                            if ($xobject = $page->getXObject($id)) {
560
                                $text[] = $xobject->getText($page);
561
                            }
562
                        }
563
                        break;
564
565 3
                    case 'rg':
566 3
                    case 'RG':
567 2
                        break;
568
569 3
                    case 're':
570
                        break;
571
572 3
                    case 'co':
573
                        break;
574
575 3
                    case 'cs':
576
                        break;
577
578 3
                    case 'gs':
579
                        break;
580
581 3
                    case 'en':
582
                        break;
583
584 3
                    case 'sc':
585 3
                    case 'SC':
586
                        break;
587
588 3
                    case 'g':
589 3
                    case 'G':
590 2
                        break;
591
592 1
                    case 'V':
593
                        break;
594
595 1
                    case 'vo':
596 1
                    case 'Vo':
597
                        break;
598
599
                    default:
600
                }
601
            }
602
        }
603
604 3
        return $text;
605
    }
606
607
    /**
608
     * @param string $text_part
609
     * @param int    $offset
610
     *
611
     * @return array
612
     */
613 21
    public function getCommandsText($text_part, &$offset = 0)
614
    {
615 21
        $commands = $matches = [];
616
617 21
        while ($offset < \strlen($text_part)) {
618 21
            $offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset);
619 21
            $char = $text_part[$offset];
620
621 21
            $operator = '';
622 21
            $type = '';
623 21
            $command = false;
624
625 21
            switch ($char) {
626 21
                case '/':
627 21
                    $type = $char;
628 21
                    if (preg_match(
629 21
                        '/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
630 21
                        substr($text_part, $offset),
631
                        $matches
632
                    )
633
                    ) {
634 21
                        $operator = $matches[2];
635 21
                        $command = $matches[1];
636 21
                        $offset += \strlen($matches[0]);
637 8
                    } elseif (preg_match(
638 8
                        '/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si',
639 8
                        substr($text_part, $offset),
640
                        $matches
641
                    )
642
                    ) {
643 8
                        $operator = $matches[2];
644 8
                        $command = $matches[1];
645 8
                        $offset += \strlen($matches[0]);
646
                    }
647 21
                    break;
648
649 21
                case '[':
650 21
                case ']':
651
                    // array object
652 19
                    $type = $char;
653 19
                    if ('[' == $char) {
654 19
                        ++$offset;
655
                        // get elements
656 19
                        $command = $this->getCommandsText($text_part, $offset);
657
658 19
                        if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
659 19
                            $operator = trim($matches[0]);
660 19
                            $offset += \strlen($matches[0]);
661
                        }
662
                    } else {
663 19
                        ++$offset;
664 19
                        break;
665
                    }
666 19
                    break;
667
668 21
                case '<':
669 21
                case '>':
670
                    // array object
671 11
                    $type = $char;
672 11
                    ++$offset;
673 11
                    if ('<' == $char) {
674 11
                        $strpos = strpos($text_part, '>', $offset);
675 11
                        $command = substr($text_part, $offset, ($strpos - $offset));
676 11
                        $offset = $strpos + 1;
677
                    }
678
679 11
                    if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
680 8
                        $operator = trim($matches[0]);
681 8
                        $offset += \strlen($matches[0]);
682
                    }
683 11
                    break;
684
685 21
                case '(':
686 21
                case ')':
687 14
                    ++$offset;
688 14
                    $type = $char;
689 14
                    $strpos = $offset;
690 14
                    if ('(' == $char) {
691 14
                        $open_bracket = 1;
692 14
                        while ($open_bracket > 0) {
693 14
                            if (!isset($text_part[$strpos])) {
694
                                break;
695
                            }
696 14
                            $ch = $text_part[$strpos];
697 14
                            switch ($ch) {
698 14
                                case '\\':
699
                                 // REVERSE SOLIDUS (5Ch) (Backslash)
700
                                    // skip next character
701 11
                                    ++$strpos;
702 11
                                    break;
703
704 14
                                case '(':
705
                                 // LEFT PARENHESIS (28h)
706
                                    ++$open_bracket;
707
                                    break;
708
709 14
                                case ')':
710
                                 // RIGHT PARENTHESIS (29h)
711 14
                                    --$open_bracket;
712 14
                                    break;
713
                            }
714 14
                            ++$strpos;
715
                        }
716 14
                        $command = substr($text_part, $offset, ($strpos - $offset - 1));
717 14
                        $offset = $strpos;
718
719 14
                        if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) {
720 12
                            $operator = $matches[1];
721 12
                            $offset += \strlen($matches[0]);
722
                        }
723
                    }
724 14
                    break;
725
726
                default:
727 21
                    if ('ET' == substr($text_part, $offset, 2)) {
728 1
                        break;
729 21
                    } elseif (preg_match(
730 21
                        '/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
731 21
                        substr($text_part, $offset),
732
                        $matches
733
                    )
734
                    ) {
735 21
                        $operator = trim($matches['id']);
736 21
                        $command = trim($matches['data']);
737 21
                        $offset += \strlen($matches[0]);
738 19
                    } elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) {
739 18
                        $type = 'n';
740 18
                        $command = trim($matches[0]);
741 18
                        $offset += \strlen($matches[0]);
742 12
                    } elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) {
743 12
                        $type = '';
744 12
                        $operator = $matches[1];
745 12
                        $command = '';
746 12
                        $offset += \strlen($matches[0]);
747
                    }
748
            }
749
750 21
            if (false !== $command) {
751 21
                $commands[] = [
752 21
                    self::TYPE => $type,
753 21
                    self::OPERATOR => $operator,
754 21
                    self::COMMAND => $command,
755
                ];
756
            } else {
757 19
                break;
758
            }
759
        }
760
761 21
        return $commands;
762
    }
763
764
    /**
765
     * @param string $content
766
     *
767
     * @return PDFObject
768
     */
769 30
    public static function factory(
770
        Document $document,
771
        Header $header,
772
        $content,
773
        Config $config = null
774
    ) {
775 30
        switch ($header->get('Type')->getContent()) {
776 30
            case 'XObject':
777 6
                switch ($header->get('Subtype')->getContent()) {
778 6
                    case 'Image':
779 4
                        return new Image($document, $header, $content, $config);
780
781 4
                    case 'Form':
782 4
                        return new Form($document, $header, $content, $config);
783
                }
784
785
                return new self($document, $header, $content, $config);
786
787 30
            case 'Pages':
788 29
                return new Pages($document, $header, $content, $config);
789
790 30
            case 'Page':
791 29
                return new Page($document, $header, $content, $config);
792
793 30
            case 'Encoding':
794 6
                return new Encoding($document, $header, $content, $config);
795
796 30
            case 'Font':
797 29
                $subtype = $header->get('Subtype')->getContent();
798 29
                $classname = '\Smalot\PdfParser\Font\Font'.$subtype;
799
800 29
                if (class_exists($classname)) {
801 29
                    return new $classname($document, $header, $content, $config);
802
                }
803
804
                return new Font($document, $header, $content, $config);
805
806
            default:
807 30
                return new self($document, $header, $content, $config);
808
        }
809
    }
810
811
    /**
812
     * Returns unique id identifying the object.
813
     *
814
     * @return string
815
     */
816 14
    protected function getUniqueId()
817
    {
818 14
        return spl_object_hash($this);
819
    }
820
}
821