Completed
Pull Request — master (#318)
by
unknown
10:20
created

PDFObject::__construct()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 5
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 2

Importance

Changes 0
Metric Value
eloc 3
dl 0
loc 5
rs 10
c 0
b 0
f 0
ccs 4
cts 4
cp 1
cc 2
nc 2
nop 3
crap 2
1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
use Smalot\PdfParser\XObject\Form;
34
use Smalot\PdfParser\XObject\Image;
35
36
/**
37
 * Class PDFObject
38
 */
39
class PDFObject
40
{
41
    const TYPE = 't';
42
43
    const OPERATOR = 'o';
44
45
    const COMMAND = 'c';
46
47
    /**
48
     * The recursion stack.
49
     *
50
     * @var array
51
     */
52
    public static $recursionStack = [];
53
54
    /**
55
     * @var Document
56
     */
57
    protected $document = null;
58
59
    /**
60
     * @var Header
61
     */
62
    protected $header = null;
63
64
    /**
65
     * @var string
66
     */
67
    protected $content = null;
68
69
    /**
70
     * @param Header $header
71
     * @param string $content
72
     */
73 32
    public function __construct(Document $document, Header $header = null, $content = null)
74
    {
75 32
        $this->document = $document;
76 32
        $this->header = null !== $header ? $header : new Header();
77 32
        $this->content = $content;
78 32
    }
79
80 29
    public function init()
81
    {
82 29
    }
83
84
    /**
85
     * @return Header|null
86
     */
87 29
    public function getHeader()
88
    {
89 29
        return $this->header;
90
    }
91
92
    /**
93
     * @param string $name
94
     *
95
     * @return Element|PDFObject
96
     */
97 22
    public function get($name)
98
    {
99 22
        return $this->header->get($name);
100
    }
101
102
    /**
103
     * @param string $name
104
     *
105
     * @return bool
106
     */
107 22
    public function has($name)
108
    {
109 22
        return $this->header->has($name);
110
    }
111
112
    /**
113
     * @param bool $deep
114
     *
115
     * @return array
116
     */
117 7
    public function getDetails($deep = true)
118
    {
119 7
        return $this->header->getDetails($deep);
120
    }
121
122
    /**
123
     * @return string|null
124
     */
125 19
    public function getContent()
126
    {
127 19
        return $this->content;
128
    }
129
130
    /**
131
     * @param string $content
132
     */
133 14
    public function cleanContent($content, $char = 'X')
134
    {
135 14
        $char = $char[0];
136 14
        $content = str_replace(['\\\\', '\\)', '\\('], $char.$char, $content);
137
138
        // Remove image bloc with binary content
139 14
        preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, PREG_OFFSET_CAPTURE);
140 14
        foreach ($matches[0] as $part) {
141 2
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
142
        }
143
144
        // Clean content in square brackets [.....]
145 14
        preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, PREG_OFFSET_CAPTURE);
146 14
        foreach ($matches[1] as $part) {
147 10
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
148
        }
149
150
        // Clean content in round brackets (.....)
151 14
        preg_match_all('/\((.*?)\)/s', $content, $matches, PREG_OFFSET_CAPTURE);
152 14
        foreach ($matches[1] as $part) {
153 11
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
154
        }
155
156
        // Clean structure
157 14
        if ($parts = preg_split('/(<|>)/s', $content, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE)) {
158 14
            $content = '';
159 14
            $level = 0;
160 14
            foreach ($parts as $part) {
161 14
                if ('<' == $part) {
162 9
                    ++$level;
163
                }
164
165 14
                $content .= (0 == $level ? $part : str_repeat($char, \strlen($part)));
166
167 14
                if ('>' == $part) {
168 9
                    --$level;
169
                }
170
            }
171
        }
172
173
        // Clean BDC and EMC markup
174 14
        preg_match_all(
175 14
            '/(\/[A-Za-z0-9\_]*\s*'.preg_quote($char).'*BDC)/s',
176
            $content,
177
            $matches,
178 14
            PREG_OFFSET_CAPTURE
179
        );
180 14
        foreach ($matches[1] as $part) {
181 2
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
182
        }
183
184 14
        preg_match_all('/\s(EMC)\s/s', $content, $matches, PREG_OFFSET_CAPTURE);
185 14
        foreach ($matches[1] as $part) {
186 6
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
187
        }
188
189 14
        return $content;
190
    }
191
192
    /**
193
     * @param string $content
194
     *
195
     * @return array
196
     */
197 13
    public function getSectionsText($content)
198
    {
199 13
        $sections = [];
200 13
        $content = ' '.$content.' ';
201 13
        $textCleaned = $this->cleanContent($content, '_');
202
203
        // Extract text blocks.
204 13
        if (preg_match_all('/\s+BT[\s|\(|\[]+(.*?)\s*ET/s', $textCleaned, $matches, PREG_OFFSET_CAPTURE)) {
205 13
            foreach ($matches[1] as $part) {
206 13
                $text = $part[0];
207 13
                if ('' === $text) {
208
                    continue;
209
                }
210 13
                $offset = $part[1];
211 13
                $section = substr($content, $offset, \strlen($text));
212
213
                // Removes BDC and EMC markup.
214 13
                $section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section.' ');
215
216 13
                $sections[] = $section;
217
            }
218
        }
219
220
        // Extract 'do' commands.
221 13
        if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, PREG_OFFSET_CAPTURE)) {
222 3
            foreach ($matches[1] as $part) {
223 3
                $text = $part[0];
224 3
                $offset = $part[1];
225 3
                $section = substr($content, $offset, \strlen($text));
226
227 3
                $sections[] = $section;
228
            }
229
        }
230
231 13
        return $sections;
232
    }
233
234
    /**
235
     * @param Page $page
236
     *
237
     * @return string
238
     *
239
     * @throws \Exception
240
     */
241 7
    public function getText(Page $page = null)
242
    {
243 7
        $text = '';
244 7
        $sections = $this->getSectionsText($this->content);
245 7
        $current_font = null;
246 7
        $current_font_size = 1;
247
248 7
        foreach ($this->document->getObjects() as $obj) {
249 7
            if ($obj instanceof Font) {
250 7
                $current_font = $obj;
251 7
                break;
252
            }
253
        }
254
255 7
        if (null === $current_font) {
256
            $current_font = new Font($this->document);
257
        }
258
259 7
        $current_position_td = ['x' => false, 'y' => false];
260 7
        $current_position_tm = ['x' => false, 'y' => false];
261
262 7
        array_push(self::$recursionStack, $this->getUniqueId());
263
264 7
        foreach ($sections as $section) {
265 7
            $commands = $this->getCommandsText($section);
266
267 7
            foreach ($commands as $command) {
268 7
                switch ($command[self::OPERATOR]) {
269
                    // set character spacing
270 7
                    case 'Tc':
271 1
                        break;
272
273
                    // move text current point
274 7
                    case 'Td':
275 6
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
276 6
                        $y = array_pop($args);
0 ignored issues
show
Bug introduced by
It seems like $args can also be of type false; however, parameter $array of array_pop() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

276
                        $y = array_pop(/** @scrutinizer ignore-type */ $args);
Loading history...
277 6
                        $x = array_pop($args);
278
                        // get max char size and use as minimum for horizontal offset
279 6
                        $fontMaxWidth = 0;
280 6
                        if (null !== $current_font) {
281 6
                            $fontDictonary = $current_font->getDetails();
282
                            // type 0
283 6
                            if (isset($fontDictonary['DescendantFonts'])) {
284 3
                                $fontDictonary = $fontDictonary['DescendantFonts'][0];
285
                            }
286
                            // type 1
287 6
                            if (isset($fontDictonary['Widths'])) {
288 3
                                foreach ($fontDictonary['Widths'] as $width) {
289 3
                                    if ((float) $width > $fontMaxWidth) {
290 3
                                        $fontMaxWidth = (float) $width;
291
                                    }
292
                                }
293
294 3
                                $fontMaxWidth = ($fontMaxWidth / 1000) * $current_font_size;
295
                            }
296
                            // CIDFontType2
297 6
                            if ('cidfonttype2' === strtolower($fontDictonary['Type'])) {
298 3
                                if (isset($fontDictonary['DW']) && $fontDictonary['DW']) {
299
                                    $fontMaxWidth = ((float) $fontDictonary['DW'] / 1000) * $current_font_size;
300
                                } else {
301
                                    // default
302 3
                                    $fontMaxWidth = $current_font_size;
303
                                }
304
                            }
305
                        }
306
307 6
                        if (((float) $x <= 0) ||
308 6
                            (false !== $current_position_td['y'] && (float) $y < (float) ($current_position_td['y']))
309
                        ) {
310
                            // vertical offset
311 6
                            $text .= "\n";
312 6
                        } elseif (false !== $current_position_td['x'] && ((float) $x - (float) (
313 6
                                $current_position_td['x']
314 6
                            )) > $fontMaxWidth
315
                        ) {
316
                            // horizontal offset
317 4
                            $text .= ' ';
318
                        }
319 6
                        $current_position_td = ['x' => $x, 'y' => $y];
320 6
                        break;
321
322
                    // move text current point and set leading
323 7
                    case 'TD':
324 1
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
325 1
                        $y = array_pop($args);
326 1
                        $x = array_pop($args);
327 1
                        if ((float) $y < 0) {
328 1
                            $text .= "\n";
329
                        } elseif ((float) $x <= 0) {
330
                            $text .= ' ';
331
                        }
332 1
                        break;
333
334 7
                    case 'Tf':
335 7
                        list($id, $size) = preg_split('/\s/s', $command[self::COMMAND]);
336 7
                        $current_font_size = (float) $size;
337 7
                        $id = trim($id, '/');
338 7
                        if (null !== $page) {
339 7
                            $current_font = $page->getFont($id);
340
                        }
341 7
                        break;
342
343 7
                    case "'":
344 7
                    case 'Tj':
345 6
                        $command[self::COMMAND] = [$command];
346
                        // no break
347 7
                    case 'TJ':
348
                        // Skip if not previously defined, should never happened.
349 7
                        if (null === $current_font) {
350
                            // Fallback
351
                            // TODO : Improve
352 1
                            $text .= $command[self::COMMAND][0][self::COMMAND];
353 1
                            break;
354
                        }
355
356 7
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
357 7
                        $text .= $sub_text;
358 7
                        break;
359
360
                    // set leading
361 7
                    case 'TL':
362 3
                        $text .= ' ';
363 3
                        break;
364
365 7
                    case 'Tm':
366 7
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
367 7
                        $y = array_pop($args);
368 7
                        $x = array_pop($args);
369 7
                        if (false !== $current_position_tm['x']) {
370 7
                            $delta = abs((float) $x - (float) ($current_position_tm['x']));
371 7
                            if ($delta > 10) {
372 4
                                $text .= "\t";
373
                            }
374
                        }
375 7
                        if (false !== $current_position_tm['y']) {
376 7
                            $delta = abs((float) $y - (float) ($current_position_tm['y']));
377 7
                            if ($delta > 10) {
378 4
                                $text .= "\n";
379
                            }
380
                        }
381 7
                        $current_position_tm = ['x' => $x, 'y' => $y];
382 7
                        break;
383
384
                    // set super/subscripting text rise
385 6
                    case 'Ts':
386
                        break;
387
388
                    // set word spacing
389 6
                    case 'Tw':
390
                        break;
391
392
                    // set horizontal scaling
393 6
                    case 'Tz':
394
                        $text .= "\n";
395
                        break;
396
397
                    // move to start of next line
398 6
                    case 'T*':
399 2
                        $text .= "\n";
400 2
                        break;
401
402 5
                    case 'Da':
403
                        break;
404
405 5
                    case 'Do':
406 3
                        if (null !== $page) {
407 3
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
408 3
                            $id = trim(array_pop($args), '/ ');
409 3
                            $xobject = $page->getXObject($id);
410
411
                            // @todo $xobject could be a ElementXRef object, which would then throw an error
412 3
                            if (\is_object($xobject) && $xobject instanceof self && !\in_array($xobject->getUniqueId(), self::$recursionStack)) {
413
                                // Not a circular reference.
414 3
                                $text .= $xobject->getText($page);
415
                            }
416
                        }
417 3
                        break;
418
419 3
                    case 'rg':
420 3
                    case 'RG':
421 2
                        break;
422
423 3
                    case 're':
424
                        break;
425
426 3
                    case 'co':
427
                        break;
428
429 3
                    case 'cs':
430 1
                        break;
431
432 3
                    case 'gs':
433 3
                        break;
434
435 3
                    case 'en':
436
                        break;
437
438 3
                    case 'sc':
439 3
                    case 'SC':
440
                        break;
441
442 3
                    case 'g':
443 2
                    case 'G':
444 3
                        break;
445
446 1
                    case 'V':
447
                        break;
448
449 1
                    case 'vo':
450 1
                    case 'Vo':
451
                        break;
452
453
                    default:
454
                }
455
            }
456
        }
457
458 7
        array_pop(self::$recursionStack);
459
460 7
        return $text.' ';
461
    }
462
463
    /**
464
     * @param Page $page
465
     *
466
     * @return array
467
     *
468
     * @throws \Exception
469
     */
470
    public function getTextArray(Page $page = null)
471
    {
472
        $text = [];
473
        $sections = $this->getSectionsText($this->content);
474
        $current_font = new Font($this->document);
475
476
        foreach ($sections as $section) {
477
            $commands = $this->getCommandsText($section);
478
479
            foreach ($commands as $command) {
480
                switch ($command[self::OPERATOR]) {
481
                    // set character spacing
482
                    case 'Tc':
483
                        break;
484
485
                    // move text current point
486
                    case 'Td':
487
                        break;
488
489
                    // move text current point and set leading
490
                    case 'TD':
491
                        break;
492
493
                    case 'Tf':
494
                        list($id) = preg_split('/\s/s', $command[self::COMMAND]);
495
                        $id = trim($id, '/');
496
                        $current_font = $page->getFont($id);
0 ignored issues
show
Bug introduced by
The method getFont() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

496
                        /** @scrutinizer ignore-call */ 
497
                        $current_font = $page->getFont($id);

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
497
                        break;
498
499
                    case "'":
500
                    case 'Tj':
501
                        $command[self::COMMAND] = [$command];
502
                        // no break
503
                    case 'TJ':
504
                        // Skip if not previously defined, should never happened.
505
                        if (null === $current_font) {
506
                            // Fallback
507
                            // TODO : Improve
508
                            $text[] = $command[self::COMMAND][0][self::COMMAND];
509
                            break;
510
                        }
511
512
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
513
                        $text[] = $sub_text;
514
                        break;
515
516
                    // set leading
517
                    case 'TL':
518
                        break;
519
520
                    case 'Tm':
521
                        break;
522
523
                    // set super/subscripting text rise
524
                    case 'Ts':
525
                        break;
526
527
                    // set word spacing
528
                    case 'Tw':
529
                        break;
530
531
                    // set horizontal scaling
532
                    case 'Tz':
533
                        //$text .= "\n";
534
                        break;
535
536
                    // move to start of next line
537
                    case 'T*':
538
                        //$text .= "\n";
539
                        break;
540
541
                    case 'Da':
542
                        break;
543
544
                    case 'Do':
545
                        if (null !== $page) {
546
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
547
                            $id = trim(array_pop($args), '/ ');
0 ignored issues
show
Bug introduced by
It seems like $args can also be of type false; however, parameter $array of array_pop() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

547
                            $id = trim(array_pop(/** @scrutinizer ignore-type */ $args), '/ ');
Loading history...
548
                            if ($xobject = $page->getXObject($id)) {
549
                                $text[] = $xobject->getText($page);
550
                            }
551
                        }
552
                        break;
553
554
                    case 'rg':
555
                    case 'RG':
556
                        break;
557
558
                    case 're':
559
                        break;
560
561
                    case 'co':
562
                        break;
563
564
                    case 'cs':
565
                        break;
566
567
                    case 'gs':
568
                        break;
569
570
                    case 'en':
571
                        break;
572
573
                    case 'sc':
574
                    case 'SC':
575
                        break;
576
577
                    case 'g':
578
                    case 'G':
579
                        break;
580
581
                    case 'V':
582
                        break;
583
584
                    case 'vo':
585
                    case 'Vo':
586
                        break;
587
588
                    default:
589
                }
590
            }
591
        }
592
593
        return $text;
594
    }
595
596
    /**
597
     * @param string $text_part
598
     * @param int    $offset
599
     *
600
     * @return array
601
     */
602 13
    public function getCommandsText($text_part, &$offset = 0)
603
    {
604 13
        $commands = $matches = [];
605
606 13
        while ($offset < \strlen($text_part)) {
607 13
            $offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset);
608 13
            $char = $text_part[$offset];
609
610 13
            $operator = '';
611 13
            $type = '';
612 13
            $command = false;
613
614 13
            switch ($char) {
615 13
                case '/':
616 13
                    $type = $char;
617 13
                    if (preg_match(
618 13
                        '/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
619 13
                        substr($text_part, $offset),
620
                        $matches
621
                    )
622
                    ) {
623 13
                        $operator = $matches[2];
624 13
                        $command = $matches[1];
625 13
                        $offset += \strlen($matches[0]);
626
                    } elseif (preg_match(
627 5
                        '/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si',
628 5
                        substr($text_part, $offset),
629
                        $matches
630
                    )
631
                    ) {
632 5
                        $operator = $matches[2];
633 5
                        $command = $matches[1];
634 5
                        $offset += \strlen($matches[0]);
635
                    }
636 13
                    break;
637
638 13
                case '[':
639 13
                case ']':
640
                    // array object
641 10
                    $type = $char;
642 10
                    if ('[' == $char) {
643 10
                        ++$offset;
644
                        // get elements
645 10
                        $command = $this->getCommandsText($text_part, $offset);
646
647 10
                        if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
648 10
                            $operator = trim($matches[0]);
649 10
                            $offset += \strlen($matches[0]);
650
                        }
651
                    } else {
652 10
                        ++$offset;
653 10
                        break;
654
                    }
655 10
                    break;
656
657 13
                case '<':
658 13
                case '>':
659
                    // array object
660 5
                    $type = $char;
661 5
                    ++$offset;
662 5
                    if ('<' == $char) {
663 5
                        $strpos = strpos($text_part, '>', $offset);
664 5
                        $command = substr($text_part, $offset, ($strpos - $offset));
665 5
                        $offset = $strpos + 1;
666
                    }
667
668 5
                    if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
669 5
                        $operator = trim($matches[0]);
670 5
                        $offset += \strlen($matches[0]);
671
                    }
672 5
                    break;
673
674 13
                case '(':
675 13
                case ')':
676 10
                    ++$offset;
677 10
                    $type = $char;
678 10
                    $strpos = $offset;
679 10
                    if ('(' == $char) {
680 10
                        $open_bracket = 1;
681 10
                        while ($open_bracket > 0) {
682 10
                            if (!isset($text_part[$strpos])) {
683
                                break;
684
                            }
685 10
                            $ch = $text_part[$strpos];
686 10
                            switch ($ch) {
687 10
                                case '\\':
688
                                 // REVERSE SOLIDUS (5Ch) (Backslash)
689
                                    // skip next character
690 9
                                    ++$strpos;
691 9
                                    break;
692
693 10
                                case '(':
694
                                 // LEFT PARENHESIS (28h)
695
                                    ++$open_bracket;
696
                                    break;
697
698 10
                                case ')':
699
                                 // RIGHT PARENTHESIS (29h)
700 10
                                    --$open_bracket;
701 10
                                    break;
702
                            }
703 10
                            ++$strpos;
704
                        }
705 10
                        $command = substr($text_part, $offset, ($strpos - $offset - 1));
706 10
                        $offset = $strpos;
707
708 10
                        if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) {
709 9
                            $operator = $matches[1];
710 9
                            $offset += \strlen($matches[0]);
711
                        }
712
                    }
713 10
                    break;
714
715
                default:
716
717 13
                    if ('ET' == substr($text_part, $offset, 2)) {
718 1
                        break;
719
                    } elseif (preg_match(
720 13
                        '/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
721 13
                        substr($text_part, $offset),
722
                        $matches
723
                    )
724
                    ) {
725 13
                        $operator = trim($matches['id']);
726 13
                        $command = trim($matches['data']);
727 13
                        $offset += \strlen($matches[0]);
728 10
                    } elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) {
729 10
                        $type = 'n';
730 10
                        $command = trim($matches[0]);
731 10
                        $offset += \strlen($matches[0]);
732 8
                    } elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) {
733 8
                        $type = '';
734 8
                        $operator = $matches[1];
735 8
                        $command = '';
736 8
                        $offset += \strlen($matches[0]);
737
                    }
738
            }
739
740 13
            if (false !== $command) {
741 13
                $commands[] = [
742 13
                    self::TYPE => $type,
743 13
                    self::OPERATOR => $operator,
744 13
                    self::COMMAND => $command,
745
                ];
746
            } else {
747 10
                break;
748
            }
749
        }
750
751 13
        return $commands;
752
    }
753
754
    /**
755
     * @param string $content
756
     *
757
     * @return PDFObject
758
     */
759 21
    public static function factory(Document $document, Header $header, $content)
760
    {
761 21
        switch ($header->get('Type')->getContent()) {
762 21
            case 'XObject':
763 5
                switch ($header->get('Subtype')->getContent()) {
764 5
                    case 'Image':
765 3
                        return new Image($document, $header, $content);
766
767 2
                    case 'Form':
768 2
                        return new Form($document, $header, $content);
769
                }
770
771
                return new self($document, $header, $content);
772
773 21
            case 'Pages':
774 20
                return new Pages($document, $header, $content);
775
776 21
            case 'Page':
777 20
                return new Page($document, $header, $content);
778
779 21
            case 'Encoding':
780 3
                return new Encoding($document, $header, $content);
781
782 21
            case 'Font':
783 20
                $subtype = $header->get('Subtype')->getContent();
784 20
                $classname = '\Smalot\PdfParser\Font\Font'.$subtype;
785
786 20
                if (class_exists($classname)) {
787 20
                    return new $classname($document, $header, $content);
788
                }
789
790
                return new Font($document, $header, $content);
791
792
            default:
793 21
                return new self($document, $header, $content);
794
        }
795
    }
796
797
    /**
798
     * Returns unique id identifying the object.
799
     *
800
     * @return string
801
     */
802 7
    protected function getUniqueId()
803
    {
804 7
        return spl_object_hash($this);
805
    }
806
}
807