Completed
Pull Request — master (#318)
by
unknown
10:20
created

PDFObject::getCommandsText()   F

Complexity

Conditions 27
Paths 65

Size

Total Lines 150
Code Lines 105

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 95
CRAP Score 27.0911

Importance

Changes 0
Metric Value
cc 27
eloc 105
c 0
b 0
f 0
nc 65
nop 2
dl 0
loc 150
ccs 95
cts 100
cp 0.95
crap 27.0911
rs 3.3333

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
use Smalot\PdfParser\XObject\Form;
34
use Smalot\PdfParser\XObject\Image;
35
36
/**
37
 * Class PDFObject
38
 */
39
class PDFObject
40
{
41
    const TYPE = 't';
42
43
    const OPERATOR = 'o';
44
45
    const COMMAND = 'c';
46
47
    /**
48
     * The recursion stack.
49
     *
50
     * @var array
51
     */
52
    public static $recursionStack = [];
53
54
    /**
55
     * @var Document
56
     */
57
    protected $document = null;
58
59
    /**
60
     * @var Header
61
     */
62
    protected $header = null;
63
64
    /**
65
     * @var string
66
     */
67
    protected $content = null;
68
69
    /**
70
     * @param Header $header
71
     * @param string $content
72
     */
73 32
    public function __construct(Document $document, Header $header = null, $content = null)
74
    {
75 32
        $this->document = $document;
76 32
        $this->header = null !== $header ? $header : new Header();
77 32
        $this->content = $content;
78 32
    }
79
80 29
    public function init()
81
    {
82 29
    }
83
84
    /**
85
     * @return Header|null
86
     */
87 29
    public function getHeader()
88
    {
89 29
        return $this->header;
90
    }
91
92
    /**
93
     * @param string $name
94
     *
95
     * @return Element|PDFObject
96
     */
97 22
    public function get($name)
98
    {
99 22
        return $this->header->get($name);
100
    }
101
102
    /**
103
     * @param string $name
104
     *
105
     * @return bool
106
     */
107 22
    public function has($name)
108
    {
109 22
        return $this->header->has($name);
110
    }
111
112
    /**
113
     * @param bool $deep
114
     *
115
     * @return array
116
     */
117 7
    public function getDetails($deep = true)
118
    {
119 7
        return $this->header->getDetails($deep);
120
    }
121
122
    /**
123
     * @return string|null
124
     */
125 19
    public function getContent()
126
    {
127 19
        return $this->content;
128
    }
129
130
    /**
131
     * @param string $content
132
     */
133 14
    public function cleanContent($content, $char = 'X')
134
    {
135 14
        $char = $char[0];
136 14
        $content = str_replace(['\\\\', '\\)', '\\('], $char.$char, $content);
137
138
        // Remove image bloc with binary content
139 14
        preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, PREG_OFFSET_CAPTURE);
140 14
        foreach ($matches[0] as $part) {
141 2
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
142
        }
143
144
        // Clean content in square brackets [.....]
145 14
        preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, PREG_OFFSET_CAPTURE);
146 14
        foreach ($matches[1] as $part) {
147 10
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
148
        }
149
150
        // Clean content in round brackets (.....)
151 14
        preg_match_all('/\((.*?)\)/s', $content, $matches, PREG_OFFSET_CAPTURE);
152 14
        foreach ($matches[1] as $part) {
153 11
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
154
        }
155
156
        // Clean structure
157 14
        if ($parts = preg_split('/(<|>)/s', $content, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE)) {
158 14
            $content = '';
159 14
            $level = 0;
160 14
            foreach ($parts as $part) {
161 14
                if ('<' == $part) {
162 9
                    ++$level;
163
                }
164
165 14
                $content .= (0 == $level ? $part : str_repeat($char, \strlen($part)));
166
167 14
                if ('>' == $part) {
168 9
                    --$level;
169
                }
170
            }
171
        }
172
173
        // Clean BDC and EMC markup
174 14
        preg_match_all(
175 14
            '/(\/[A-Za-z0-9\_]*\s*'.preg_quote($char).'*BDC)/s',
176
            $content,
177
            $matches,
178 14
            PREG_OFFSET_CAPTURE
179
        );
180 14
        foreach ($matches[1] as $part) {
181 2
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
182
        }
183
184 14
        preg_match_all('/\s(EMC)\s/s', $content, $matches, PREG_OFFSET_CAPTURE);
185 14
        foreach ($matches[1] as $part) {
186 6
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
187
        }
188
189 14
        return $content;
190
    }
191
192
    /**
193
     * @param string $content
194
     *
195
     * @return array
196
     */
197 13
    public function getSectionsText($content)
198
    {
199 13
        $sections = [];
200 13
        $content = ' '.$content.' ';
201 13
        $textCleaned = $this->cleanContent($content, '_');
202
203
        // Extract text blocks.
204 13
        if (preg_match_all('/\s+BT[\s|\(|\[]+(.*?)\s*ET/s', $textCleaned, $matches, PREG_OFFSET_CAPTURE)) {
205 13
            foreach ($matches[1] as $part) {
206 13
                $text = $part[0];
207 13
                if ('' === $text) {
208
                    continue;
209
                }
210 13
                $offset = $part[1];
211 13
                $section = substr($content, $offset, \strlen($text));
212
213
                // Removes BDC and EMC markup.
214 13
                $section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section.' ');
215
216 13
                $sections[] = $section;
217
            }
218
        }
219
220
        // Extract 'do' commands.
221 13
        if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, PREG_OFFSET_CAPTURE)) {
222 3
            foreach ($matches[1] as $part) {
223 3
                $text = $part[0];
224 3
                $offset = $part[1];
225 3
                $section = substr($content, $offset, \strlen($text));
226
227 3
                $sections[] = $section;
228
            }
229
        }
230
231 13
        return $sections;
232
    }
233
234
    /**
235
     * @param Page $page
236
     *
237
     * @return string
238
     *
239
     * @throws \Exception
240
     */
241 7
    public function getText(Page $page = null)
242
    {
243 7
        $text = '';
244 7
        $sections = $this->getSectionsText($this->content);
245 7
        $current_font = null;
246 7
        $current_font_size = 1;
247
248 7
        foreach ($this->document->getObjects() as $obj) {
249 7
            if ($obj instanceof Font) {
250 7
                $current_font = $obj;
251 7
                break;
252
            }
253
        }
254
255 7
        if (null === $current_font) {
256
            $current_font = new Font($this->document);
257
        }
258
259 7
        $current_position_td = ['x' => false, 'y' => false];
260 7
        $current_position_tm = ['x' => false, 'y' => false];
261
262 7
        array_push(self::$recursionStack, $this->getUniqueId());
263
264 7
        foreach ($sections as $section) {
265 7
            $commands = $this->getCommandsText($section);
266
267 7
            foreach ($commands as $command) {
268 7
                switch ($command[self::OPERATOR]) {
269
                    // set character spacing
270 7
                    case 'Tc':
271 1
                        break;
272
273
                    // move text current point
274 7
                    case 'Td':
275 6
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
276 6
                        $y = array_pop($args);
0 ignored issues
show
Bug introduced by
It seems like $args can also be of type false; however, parameter $array of array_pop() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

276
                        $y = array_pop(/** @scrutinizer ignore-type */ $args);
Loading history...
277 6
                        $x = array_pop($args);
278
                        // get max char size and use as minimum for horizontal offset
279 6
                        $fontMaxWidth = 0;
280 6
                        if (null !== $current_font) {
281 6
                            $fontDictonary = $current_font->getDetails();
282
                            // type 0
283 6
                            if (isset($fontDictonary['DescendantFonts'])) {
284 3
                                $fontDictonary = $fontDictonary['DescendantFonts'][0];
285
                            }
286
                            // type 1
287 6
                            if (isset($fontDictonary['Widths'])) {
288 3
                                foreach ($fontDictonary['Widths'] as $width) {
289 3
                                    if ((float) $width > $fontMaxWidth) {
290 3
                                        $fontMaxWidth = (float) $width;
291
                                    }
292
                                }
293
294 3
                                $fontMaxWidth = ($fontMaxWidth / 1000) * $current_font_size;
295
                            }
296
                            // CIDFontType2
297 6
                            if ('cidfonttype2' === strtolower($fontDictonary['Type'])) {
298 3
                                if (isset($fontDictonary['DW']) && $fontDictonary['DW']) {
299
                                    $fontMaxWidth = ((float) $fontDictonary['DW'] / 1000) * $current_font_size;
300
                                } else {
301
                                    // default
302 3
                                    $fontMaxWidth = $current_font_size;
303
                                }
304
                            }
305
                        }
306
307 6
                        if (((float) $x <= 0) ||
308 6
                            (false !== $current_position_td['y'] && (float) $y < (float) ($current_position_td['y']))
309
                        ) {
310
                            // vertical offset
311 6
                            $text .= "\n";
312 6
                        } elseif (false !== $current_position_td['x'] && ((float) $x - (float) (
313 6
                                $current_position_td['x']
314 6
                            )) > $fontMaxWidth
315
                        ) {
316
                            // horizontal offset
317 4
                            $text .= ' ';
318
                        }
319 6
                        $current_position_td = ['x' => $x, 'y' => $y];
320 6
                        break;
321
322
                    // move text current point and set leading
323 7
                    case 'TD':
324 1
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
325 1
                        $y = array_pop($args);
326 1
                        $x = array_pop($args);
327 1
                        if ((float) $y < 0) {
328 1
                            $text .= "\n";
329
                        } elseif ((float) $x <= 0) {
330
                            $text .= ' ';
331
                        }
332 1
                        break;
333
334 7
                    case 'Tf':
335 7
                        list($id, $size) = preg_split('/\s/s', $command[self::COMMAND]);
336 7
                        $current_font_size = (float) $size;
337 7
                        $id = trim($id, '/');
338 7
                        if (null !== $page) {
339 7
                            $current_font = $page->getFont($id);
340
                        }
341 7
                        break;
342
343 7
                    case "'":
344 7
                    case 'Tj':
345 6
                        $command[self::COMMAND] = [$command];
346
                        // no break
347 7
                    case 'TJ':
348
                        // Skip if not previously defined, should never happened.
349 7
                        if (null === $current_font) {
350
                            // Fallback
351
                            // TODO : Improve
352 1
                            $text .= $command[self::COMMAND][0][self::COMMAND];
353 1
                            break;
354
                        }
355
356 7
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
357 7
                        $text .= $sub_text;
358 7
                        break;
359
360
                    // set leading
361 7
                    case 'TL':
362 3
                        $text .= ' ';
363 3
                        break;
364
365 7
                    case 'Tm':
366 7
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
367 7
                        $y = array_pop($args);
368 7
                        $x = array_pop($args);
369 7
                        if (false !== $current_position_tm['x']) {
370 7
                            $delta = abs((float) $x - (float) ($current_position_tm['x']));
371 7
                            if ($delta > 10) {
372 4
                                $text .= "\t";
373
                            }
374
                        }
375 7
                        if (false !== $current_position_tm['y']) {
376 7
                            $delta = abs((float) $y - (float) ($current_position_tm['y']));
377 7
                            if ($delta > 10) {
378 4
                                $text .= "\n";
379
                            }
380
                        }
381 7
                        $current_position_tm = ['x' => $x, 'y' => $y];
382 7
                        break;
383
384
                    // set super/subscripting text rise
385 6
                    case 'Ts':
386
                        break;
387
388
                    // set word spacing
389 6
                    case 'Tw':
390
                        break;
391
392
                    // set horizontal scaling
393 6
                    case 'Tz':
394
                        $text .= "\n";
395
                        break;
396
397
                    // move to start of next line
398 6
                    case 'T*':
399 2
                        $text .= "\n";
400 2
                        break;
401
402 5
                    case 'Da':
403
                        break;
404
405 5
                    case 'Do':
406 3
                        if (null !== $page) {
407 3
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
408 3
                            $id = trim(array_pop($args), '/ ');
409 3
                            $xobject = $page->getXObject($id);
410
411
                            // @todo $xobject could be a ElementXRef object, which would then throw an error
412 3
                            if (\is_object($xobject) && $xobject instanceof self && !\in_array($xobject->getUniqueId(), self::$recursionStack)) {
413
                                // Not a circular reference.
414 3
                                $text .= $xobject->getText($page);
415
                            }
416
                        }
417 3
                        break;
418
419 3
                    case 'rg':
420 3
                    case 'RG':
421 2
                        break;
422
423 3
                    case 're':
424
                        break;
425
426 3
                    case 'co':
427
                        break;
428
429 3
                    case 'cs':
430 1
                        break;
431
432 3
                    case 'gs':
433 3
                        break;
434
435 3
                    case 'en':
436
                        break;
437
438 3
                    case 'sc':
439 3
                    case 'SC':
440
                        break;
441
442 3
                    case 'g':
443 2
                    case 'G':
444 3
                        break;
445
446 1
                    case 'V':
447
                        break;
448
449 1
                    case 'vo':
450 1
                    case 'Vo':
451
                        break;
452
453
                    default:
454
                }
455
            }
456
        }
457
458 7
        array_pop(self::$recursionStack);
459
460 7
        return $text.' ';
461
    }
462
463
    /**
464
     * @param Page $page
465
     *
466
     * @return array
467
     *
468
     * @throws \Exception
469
     */
470
    public function getTextArray(Page $page = null)
471
    {
472
        $text = [];
473
        $sections = $this->getSectionsText($this->content);
474
        $current_font = new Font($this->document);
475
476
        foreach ($sections as $section) {
477
            $commands = $this->getCommandsText($section);
478
479
            foreach ($commands as $command) {
480
                switch ($command[self::OPERATOR]) {
481
                    // set character spacing
482
                    case 'Tc':
483
                        break;
484
485
                    // move text current point
486
                    case 'Td':
487
                        break;
488
489
                    // move text current point and set leading
490
                    case 'TD':
491
                        break;
492
493
                    case 'Tf':
494
                        list($id) = preg_split('/\s/s', $command[self::COMMAND]);
495
                        $id = trim($id, '/');
496
                        $current_font = $page->getFont($id);
0 ignored issues
show
Bug introduced by
The method getFont() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

496
                        /** @scrutinizer ignore-call */ 
497
                        $current_font = $page->getFont($id);

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
497
                        break;
498
499
                    case "'":
500
                    case 'Tj':
501
                        $command[self::COMMAND] = [$command];
502
                        // no break
503
                    case 'TJ':
504
                        // Skip if not previously defined, should never happened.
505
                        if (null === $current_font) {
506
                            // Fallback
507
                            // TODO : Improve
508
                            $text[] = $command[self::COMMAND][0][self::COMMAND];
509
                            break;
510
                        }
511
512
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
513
                        $text[] = $sub_text;
514
                        break;
515
516
                    // set leading
517
                    case 'TL':
518
                        break;
519
520
                    case 'Tm':
521
                        break;
522
523
                    // set super/subscripting text rise
524
                    case 'Ts':
525
                        break;
526
527
                    // set word spacing
528
                    case 'Tw':
529
                        break;
530
531
                    // set horizontal scaling
532
                    case 'Tz':
533
                        //$text .= "\n";
534
                        break;
535
536
                    // move to start of next line
537
                    case 'T*':
538
                        //$text .= "\n";
539
                        break;
540
541
                    case 'Da':
542
                        break;
543
544
                    case 'Do':
545
                        if (null !== $page) {
546
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
547
                            $id = trim(array_pop($args), '/ ');
0 ignored issues
show
Bug introduced by
It seems like $args can also be of type false; however, parameter $array of array_pop() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

547
                            $id = trim(array_pop(/** @scrutinizer ignore-type */ $args), '/ ');
Loading history...
548
                            if ($xobject = $page->getXObject($id)) {
549
                                $text[] = $xobject->getText($page);
550
                            }
551
                        }
552
                        break;
553
554
                    case 'rg':
555
                    case 'RG':
556
                        break;
557
558
                    case 're':
559
                        break;
560
561
                    case 'co':
562
                        break;
563
564
                    case 'cs':
565
                        break;
566
567
                    case 'gs':
568
                        break;
569
570
                    case 'en':
571
                        break;
572
573
                    case 'sc':
574
                    case 'SC':
575
                        break;
576
577
                    case 'g':
578
                    case 'G':
579
                        break;
580
581
                    case 'V':
582
                        break;
583
584
                    case 'vo':
585
                    case 'Vo':
586
                        break;
587
588
                    default:
589
                }
590
            }
591
        }
592
593
        return $text;
594
    }
595
596
    /**
597
     * @param string $text_part
598
     * @param int    $offset
599
     *
600
     * @return array
601
     */
602 13
    public function getCommandsText($text_part, &$offset = 0)
603
    {
604 13
        $commands = $matches = [];
605
606 13
        while ($offset < \strlen($text_part)) {
607 13
            $offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset);
608 13
            $char = $text_part[$offset];
609
610 13
            $operator = '';
611 13
            $type = '';
612 13
            $command = false;
613
614 13
            switch ($char) {
615 13
                case '/':
616 13
                    $type = $char;
617 13
                    if (preg_match(
618 13
                        '/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
619 13
                        substr($text_part, $offset),
620
                        $matches
621
                    )
622
                    ) {
623 13
                        $operator = $matches[2];
624 13
                        $command = $matches[1];
625 13
                        $offset += \strlen($matches[0]);
626
                    } elseif (preg_match(
627 5
                        '/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si',
628 5
                        substr($text_part, $offset),
629
                        $matches
630
                    )
631
                    ) {
632 5
                        $operator = $matches[2];
633 5
                        $command = $matches[1];
634 5
                        $offset += \strlen($matches[0]);
635
                    }
636 13
                    break;
637
638 13
                case '[':
639 13
                case ']':
640
                    // array object
641 10
                    $type = $char;
642 10
                    if ('[' == $char) {
643 10
                        ++$offset;
644
                        // get elements
645 10
                        $command = $this->getCommandsText($text_part, $offset);
646
647 10
                        if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
648 10
                            $operator = trim($matches[0]);
649 10
                            $offset += \strlen($matches[0]);
650
                        }
651
                    } else {
652 10
                        ++$offset;
653 10
                        break;
654
                    }
655 10
                    break;
656
657 13
                case '<':
658 13
                case '>':
659
                    // array object
660 5
                    $type = $char;
661 5
                    ++$offset;
662 5
                    if ('<' == $char) {
663 5
                        $strpos = strpos($text_part, '>', $offset);
664 5
                        $command = substr($text_part, $offset, ($strpos - $offset));
665 5
                        $offset = $strpos + 1;
666
                    }
667
668 5
                    if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
669 5
                        $operator = trim($matches[0]);
670 5
                        $offset += \strlen($matches[0]);
671
                    }
672 5
                    break;
673
674 13
                case '(':
675 13
                case ')':
676 10
                    ++$offset;
677 10
                    $type = $char;
678 10
                    $strpos = $offset;
679 10
                    if ('(' == $char) {
680 10
                        $open_bracket = 1;
681 10
                        while ($open_bracket > 0) {
682 10
                            if (!isset($text_part[$strpos])) {
683
                                break;
684
                            }
685 10
                            $ch = $text_part[$strpos];
686 10
                            switch ($ch) {
687 10
                                case '\\':
688
                                 // REVERSE SOLIDUS (5Ch) (Backslash)
689
                                    // skip next character
690 9
                                    ++$strpos;
691 9
                                    break;
692
693 10
                                case '(':
694
                                 // LEFT PARENHESIS (28h)
695
                                    ++$open_bracket;
696
                                    break;
697
698 10
                                case ')':
699
                                 // RIGHT PARENTHESIS (29h)
700 10
                                    --$open_bracket;
701 10
                                    break;
702
                            }
703 10
                            ++$strpos;
704
                        }
705 10
                        $command = substr($text_part, $offset, ($strpos - $offset - 1));
706 10
                        $offset = $strpos;
707
708 10
                        if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) {
709 9
                            $operator = $matches[1];
710 9
                            $offset += \strlen($matches[0]);
711
                        }
712
                    }
713 10
                    break;
714
715
                default:
716
717 13
                    if ('ET' == substr($text_part, $offset, 2)) {
718 1
                        break;
719
                    } elseif (preg_match(
720 13
                        '/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
721 13
                        substr($text_part, $offset),
722
                        $matches
723
                    )
724
                    ) {
725 13
                        $operator = trim($matches['id']);
726 13
                        $command = trim($matches['data']);
727 13
                        $offset += \strlen($matches[0]);
728 10
                    } elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) {
729 10
                        $type = 'n';
730 10
                        $command = trim($matches[0]);
731 10
                        $offset += \strlen($matches[0]);
732 8
                    } elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) {
733 8
                        $type = '';
734 8
                        $operator = $matches[1];
735 8
                        $command = '';
736 8
                        $offset += \strlen($matches[0]);
737
                    }
738
            }
739
740 13
            if (false !== $command) {
741 13
                $commands[] = [
742 13
                    self::TYPE => $type,
743 13
                    self::OPERATOR => $operator,
744 13
                    self::COMMAND => $command,
745
                ];
746
            } else {
747 10
                break;
748
            }
749
        }
750
751 13
        return $commands;
752
    }
753
754
    /**
755
     * @param string $content
756
     *
757
     * @return PDFObject
758
     */
759 21
    public static function factory(Document $document, Header $header, $content)
760
    {
761 21
        switch ($header->get('Type')->getContent()) {
762 21
            case 'XObject':
763 5
                switch ($header->get('Subtype')->getContent()) {
764 5
                    case 'Image':
765 3
                        return new Image($document, $header, $content);
766
767 2
                    case 'Form':
768 2
                        return new Form($document, $header, $content);
769
                }
770
771
                return new self($document, $header, $content);
772
773 21
            case 'Pages':
774 20
                return new Pages($document, $header, $content);
775
776 21
            case 'Page':
777 20
                return new Page($document, $header, $content);
778
779 21
            case 'Encoding':
780 3
                return new Encoding($document, $header, $content);
781
782 21
            case 'Font':
783 20
                $subtype = $header->get('Subtype')->getContent();
784 20
                $classname = '\Smalot\PdfParser\Font\Font'.$subtype;
785
786 20
                if (class_exists($classname)) {
787 20
                    return new $classname($document, $header, $content);
788
                }
789
790
                return new Font($document, $header, $content);
791
792
            default:
793 21
                return new self($document, $header, $content);
794
        }
795
    }
796
797
    /**
798
     * Returns unique id identifying the object.
799
     *
800
     * @return string
801
     */
802 7
    protected function getUniqueId()
803
    {
804 7
        return spl_object_hash($this);
805
    }
806
}
807