Passed
Pull Request — master (#318)
by Konrad
03:42
created

PDFObject::getText()   F

Complexity

Conditions 60
Paths 912

Size

Total Lines 220
Code Lines 137

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 121
CRAP Score 65.7362

Importance

Changes 1
Bugs 1 Features 0
Metric Value
cc 60
eloc 137
c 1
b 1
f 0
nc 912
nop 1
dl 0
loc 220
rs 0.0977
ccs 121
cts 137
cp 0.8832
crap 65.7362

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
use Smalot\PdfParser\XObject\Form;
34
use Smalot\PdfParser\XObject\Image;
35
36
/**
37
 * Class PDFObject
38
 */
39
class PDFObject
40
{
41
    const TYPE = 't';
42
43
    const OPERATOR = 'o';
44
45
    const COMMAND = 'c';
46
47
    /**
48
     * The recursion stack.
49
     *
50
     * @var array
51
     */
52
    public static $recursionStack = [];
53
54
    /**
55
     * @var Document
56
     */
57
    protected $document = null;
58
59
    /**
60
     * @var Header
61
     */
62
    protected $header = null;
63
64
    /**
65
     * @var string
66
     */
67
    protected $content = null;
68
69
    /**
70
     * @param Header $header
71
     * @param string $content
72
     */
73 33
    public function __construct(Document $document, Header $header = null, $content = null)
74
    {
75 33
        $this->document = $document;
76 33
        $this->header = null !== $header ? $header : new Header();
77 33
        $this->content = $content;
78 33
    }
79
80 29
    public function init()
81
    {
82 29
    }
83
84
    /**
85
     * @return Header|null
86
     */
87 29
    public function getHeader()
88
    {
89 29
        return $this->header;
90
    }
91
92
    /**
93
     * @param string $name
94
     *
95
     * @return Element|PDFObject
96
     */
97 24
    public function get($name)
98
    {
99 24
        return $this->header->get($name);
100
    }
101
102
    /**
103
     * @param string $name
104
     *
105
     * @return bool
106
     */
107 23
    public function has($name)
108
    {
109 23
        return $this->header->has($name);
110
    }
111
112
    /**
113
     * @param bool $deep
114
     *
115
     * @return array
116
     */
117 7
    public function getDetails($deep = true)
118
    {
119 7
        return $this->header->getDetails($deep);
120
    }
121
122
    /**
123
     * @return string|null
124
     */
125 20
    public function getContent()
126
    {
127 20
        return $this->content;
128
    }
129
130
    /**
131
     * @param string $content
132
     */
133 15
    public function cleanContent($content, $char = 'X')
134
    {
135 15
        $char = $char[0];
136 15
        $content = str_replace(['\\\\', '\\)', '\\('], $char.$char, $content);
137
138
        // Remove image bloc with binary content
139 15
        preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, PREG_OFFSET_CAPTURE);
140 15
        foreach ($matches[0] as $part) {
141 2
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
142
        }
143
144
        // Clean content in square brackets [.....]
145 15
        preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, PREG_OFFSET_CAPTURE);
146 15
        foreach ($matches[1] as $part) {
147 10
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
148
        }
149
150
        // Clean content in round brackets (.....)
151 15
        preg_match_all('/\((.*?)\)/s', $content, $matches, PREG_OFFSET_CAPTURE);
152 15
        foreach ($matches[1] as $part) {
153 11
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
154
        }
155
156
        // Clean structure
157 15
        if ($parts = preg_split('/(<|>)/s', $content, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE)) {
158 15
            $content = '';
159 15
            $level = 0;
160 15
            foreach ($parts as $part) {
161 15
                if ('<' == $part) {
162 10
                    ++$level;
163
                }
164
165 15
                $content .= (0 == $level ? $part : str_repeat($char, \strlen($part)));
166
167 15
                if ('>' == $part) {
168 10
                    --$level;
169
                }
170
            }
171
        }
172
173
        // Clean BDC and EMC markup
174 15
        preg_match_all(
175 15
            '/(\/[A-Za-z0-9\_]*\s*'.preg_quote($char).'*BDC)/s',
176
            $content,
177
            $matches,
178 15
            PREG_OFFSET_CAPTURE
179
        );
180 15
        foreach ($matches[1] as $part) {
181 2
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
182
        }
183
184 15
        preg_match_all('/\s(EMC)\s/s', $content, $matches, PREG_OFFSET_CAPTURE);
185 15
        foreach ($matches[1] as $part) {
186 6
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
187
        }
188
189 15
        return $content;
190
    }
191
192
    /**
193
     * @param string $content
194
     *
195
     * @return array
196
     */
197 14
    public function getSectionsText($content)
198
    {
199 14
        $sections = [];
200 14
        $content = ' '.$content.' ';
201 14
        $textCleaned = $this->cleanContent($content, '_');
202
203
        // Extract text blocks.
204 14
        if (preg_match_all('/\s+BT[\s|\(|\[]+(.*?)\s*ET/s', $textCleaned, $matches, PREG_OFFSET_CAPTURE)) {
205 14
            foreach ($matches[1] as $part) {
206 14
                $text = $part[0];
207 14
                if ('' === $text) {
208
                    continue;
209
                }
210 14
                $offset = $part[1];
211 14
                $section = substr($content, $offset, \strlen($text));
212
213
                // Removes BDC and EMC markup.
214 14
                $section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section.' ');
215
216 14
                $sections[] = $section;
217
            }
218
        }
219
220
        // Extract 'do' commands.
221 14
        if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, PREG_OFFSET_CAPTURE)) {
222 5
            foreach ($matches[1] as $part) {
223 5
                $text = $part[0];
224 5
                $offset = $part[1];
225 5
                $section = substr($content, $offset, \strlen($text));
226
227 5
                $sections[] = $section;
228
            }
229
        }
230
231 14
        return $sections;
232
    }
233
234
    /**
235
     * @param Page $page
236
     *
237
     * @return string
238
     *
239
     * @throws \Exception
240
     */
241 8
    public function getText(Page $page = null)
242
    {
243 8
        $text = '';
244 8
        $sections = $this->getSectionsText($this->content);
245 8
        $current_font = null;
246 8
        $current_font_size = 1;
247
248 8
        foreach ($this->document->getObjects() as $obj) {
249 8
            if ($obj instanceof Font) {
250 8
                $current_font = $obj;
251 8
                break;
252
            }
253
        }
254
255 8
        if (null === $current_font) {
256
            $current_font = new Font($this->document);
257
        }
258
259 8
        $current_position_td = ['x' => false, 'y' => false];
260 8
        $current_position_tm = ['x' => false, 'y' => false];
261
262 8
        array_push(self::$recursionStack, $this->getUniqueId());
263
264 8
        foreach ($sections as $section) {
265 8
            $commands = $this->getCommandsText($section);
266
267 8
            foreach ($commands as $command) {
268 8
                switch ($command[self::OPERATOR]) {
269
                    // set character spacing
270 8
                    case 'Tc':
271 1
                        break;
272
273
                    // move text current point
274 8
                    case 'Td':
275 6
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
276 6
                        $y = array_pop($args);
0 ignored issues
show
Bug introduced by
It seems like $args can also be of type false; however, parameter $array of array_pop() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

276
                        $y = array_pop(/** @scrutinizer ignore-type */ $args);
Loading history...
277 6
                        $x = array_pop($args);
278
                        // get max char size and use as minimum for horizontal offset
279 6
                        $fontMaxWidth = 0;
280 6
                        if (null !== $current_font) {
281 6
                            $fontDictonary = $current_font->getDetails();
282
                            // type 0
283 6
                            if (isset($fontDictonary['DescendantFonts'])) {
284 3
                                $fontDictonary = $fontDictonary['DescendantFonts'][0];
285
                            }
286
                            // type 1
287 6
                            if (isset($fontDictonary['Widths'])) {
288 3
                                foreach ($fontDictonary['Widths'] as $width) {
289 3
                                    if ((float) $width > $fontMaxWidth) {
290 3
                                        $fontMaxWidth = (float) $width;
291
                                    }
292
                                }
293
294 3
                                $fontMaxWidth = ($fontMaxWidth / 1000) * $current_font_size;
295
                            }
296
                            // CIDFontType2
297 6
                            if ('cidfonttype2' === strtolower($fontDictonary['Type'])) {
298 3
                                if (isset($fontDictonary['DW']) && $fontDictonary['DW']) {
299
                                    $fontMaxWidth = ((float) $fontDictonary['DW'] / 1000) * $current_font_size;
300
                                } else {
301
                                    // default
302 3
                                    $fontMaxWidth = $current_font_size;
303
                                }
304
                            }
305
                        }
306
307 6
                        if (((float) $x <= 0) ||
308 6
                            (false !== $current_position_td['y'] && (float) $y < (float) ($current_position_td['y']))
309
                        ) {
310
                            // vertical offset
311 6
                            $text .= "\n";
312 6
                        } elseif (false !== $current_position_td['x'] && ((float) $x - (float) (
313 6
                                $current_position_td['x']
314 6
                            )) > $fontMaxWidth
315
                        ) {
316
                            // horizontal offset
317 4
                            $text .= ' ';
318
                        }
319 6
                        $current_position_td = ['x' => $x, 'y' => $y];
320 6
                        break;
321
322
                    // move text current point and set leading
323 8
                    case 'TD':
324 1
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
325 1
                        $y = array_pop($args);
326 1
                        $x = array_pop($args);
327 1
                        if ((float) $y < 0) {
328 1
                            $text .= "\n";
329
                        } elseif ((float) $x <= 0) {
330
                            $text .= ' ';
331
                        }
332 1
                        break;
333
334 8
                    case 'Tf':
335 8
                        list($id, $size) = preg_split('/\s/s', $command[self::COMMAND]);
336 8
                        $current_font_size = (float) $size;
337 8
                        $id = trim($id, '/');
338 8
                        if (null !== $page) {
339 8
                            $current_font = $page->getFont($id);
340
                        }
341 8
                        break;
342
343 8
                    case "'":
344 8
                    case 'Tj':
345 6
                        $command[self::COMMAND] = [$command];
346
                        // no break
347 8
                    case 'TJ':
348
                        // Skip if not previously defined, should never happened.
349 8
                        if (null === $current_font) {
350
                            // Fallback
351
                            // TODO : Improve
352 1
                            $text .= $command[self::COMMAND][0][self::COMMAND];
353 1
                            break;
354
                        }
355
356 8
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
357 8
                        $text .= $sub_text;
358 8
                        break;
359
360
                    // set leading
361 8
                    case 'TL':
362 3
                        $text .= ' ';
363 3
                        break;
364
365 8
                    case 'Tm':
366 8
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
367 8
                        $y = array_pop($args);
368 8
                        $x = array_pop($args);
369 8
                        if (false !== $current_position_tm['x']) {
370 8
                            $delta = abs((float) $x - (float) ($current_position_tm['x']));
371 8
                            if ($delta > 10) {
372 4
                                $text .= "\t";
373
                            }
374
                        }
375 8
                        if (false !== $current_position_tm['y']) {
376 8
                            $delta = abs((float) $y - (float) ($current_position_tm['y']));
377 8
                            if ($delta > 10) {
378 5
                                $text .= "\n";
379
                            }
380
                        }
381 8
                        $current_position_tm = ['x' => $x, 'y' => $y];
382 8
                        break;
383
384
                    // set super/subscripting text rise
385 7
                    case 'Ts':
386
                        break;
387
388
                    // set word spacing
389 7
                    case 'Tw':
390
                        break;
391
392
                    // set horizontal scaling
393 7
                    case 'Tz':
394
                        $text .= "\n";
395
                        break;
396
397
                    // move to start of next line
398 7
                    case 'T*':
399 2
                        $text .= "\n";
400 2
                        break;
401
402 6
                    case 'Da':
403
                        break;
404
405 6
                    case 'Do':
406 5
                        if (null !== $page) {
407 5
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
408 5
                            $id = trim(array_pop($args), '/ ');
409 5
                            $xobject = $page->getXObject($id);
410
411
                            // @todo $xobject could be a ElementXRef object, which would then throw an error
412 5
                            if (\is_object($xobject) && $xobject instanceof self && !\in_array($xobject->getUniqueId(), self::$recursionStack)) {
413
                                // Not a circular reference.
414 5
                                $text .= $xobject->getText($page);
415
                            }
416
                        }
417 5
                        break;
418
419 4
                    case 'rg':
420 4
                    case 'RG':
421 2
                        break;
422
423 4
                    case 're':
424
                        break;
425
426 4
                    case 'co':
427
                        break;
428
429 4
                    case 'cs':
430 1
                        break;
431
432 4
                    case 'gs':
433 3
                        break;
434
435 4
                    case 'en':
436
                        break;
437
438 4
                    case 'sc':
439 4
                    case 'SC':
440
                        break;
441
442 4
                    case 'g':
443 3
                    case 'G':
444 3
                        break;
445
446 2
                    case 'V':
447
                        break;
448
449 2
                    case 'vo':
450 2
                    case 'Vo':
451
                        break;
452
453
                    default:
454
                }
455
            }
456
        }
457
458 8
        array_pop(self::$recursionStack);
459
460 8
        return $text.' ';
461
    }
462
463
    /**
464
     * @param Page $page
465
     *
466
     * @return array
467
     *
468
     * @throws \Exception
469
     */
470
    public function getTextArray(Page $page = null)
471
    {
472
        $text = [];
473
        $sections = $this->getSectionsText($this->content);
474
        $current_font = new Font($this->document);
475
476
        foreach ($sections as $section) {
477
            $commands = $this->getCommandsText($section);
478
479
            foreach ($commands as $command) {
480
                switch ($command[self::OPERATOR]) {
481
                    // set character spacing
482
                    case 'Tc':
483
                        break;
484
485
                    // move text current point
486
                    case 'Td':
487
                        break;
488
489
                    // move text current point and set leading
490
                    case 'TD':
491
                        break;
492
493
                    case 'Tf':
494
                        list($id) = preg_split('/\s/s', $command[self::COMMAND]);
495
                        $id = trim($id, '/');
496
                        $current_font = $page->getFont($id);
0 ignored issues
show
Bug introduced by
The method getFont() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

496
                        /** @scrutinizer ignore-call */ 
497
                        $current_font = $page->getFont($id);

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
497
                        break;
498
499
                    case "'":
500
                    case 'Tj':
501
                        $command[self::COMMAND] = [$command];
502
                        // no break
503
                    case 'TJ':
504
                        // Skip if not previously defined, should never happened.
505
                        if (null === $current_font) {
506
                            // Fallback
507
                            // TODO : Improve
508
                            $text[] = $command[self::COMMAND][0][self::COMMAND];
509
                            break;
510
                        }
511
512
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
513
                        $text[] = $sub_text;
514
                        break;
515
516
                    // set leading
517
                    case 'TL':
518
                        break;
519
520
                    case 'Tm':
521
                        break;
522
523
                    // set super/subscripting text rise
524
                    case 'Ts':
525
                        break;
526
527
                    // set word spacing
528
                    case 'Tw':
529
                        break;
530
531
                    // set horizontal scaling
532
                    case 'Tz':
533
                        //$text .= "\n";
534
                        break;
535
536
                    // move to start of next line
537
                    case 'T*':
538
                        //$text .= "\n";
539
                        break;
540
541
                    case 'Da':
542
                        break;
543
544
                    case 'Do':
545
                        if (null !== $page) {
546
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
547
                            $id = trim(array_pop($args), '/ ');
0 ignored issues
show
Bug introduced by
It seems like $args can also be of type false; however, parameter $array of array_pop() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

547
                            $id = trim(array_pop(/** @scrutinizer ignore-type */ $args), '/ ');
Loading history...
548
                            if ($xobject = $page->getXObject($id)) {
549
                                $text[] = $xobject->getText($page);
550
                            }
551
                        }
552
                        break;
553
554
                    case 'rg':
555
                    case 'RG':
556
                        break;
557
558
                    case 're':
559
                        break;
560
561
                    case 'co':
562
                        break;
563
564
                    case 'cs':
565
                        break;
566
567
                    case 'gs':
568
                        break;
569
570
                    case 'en':
571
                        break;
572
573
                    case 'sc':
574
                    case 'SC':
575
                        break;
576
577
                    case 'g':
578
                    case 'G':
579
                        break;
580
581
                    case 'V':
582
                        break;
583
584
                    case 'vo':
585
                    case 'Vo':
586
                        break;
587
588
                    default:
589
                }
590
            }
591
        }
592
593
        return $text;
594
    }
595
596
    /**
597
     * @param string $text_part
598
     * @param int    $offset
599
     *
600
     * @return array
601
     */
602 14
    public function getCommandsText($text_part, &$offset = 0)
603
    {
604 14
        $commands = $matches = [];
605
606 14
        while ($offset < \strlen($text_part)) {
607 14
            $offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset);
608 14
            $char = $text_part[$offset];
609
610 14
            $operator = '';
611 14
            $type = '';
612 14
            $command = false;
613
614 14
            switch ($char) {
615 14
                case '/':
616 14
                    $type = $char;
617 14
                    if (preg_match(
618 14
                        '/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
619 14
                        substr($text_part, $offset),
620
                        $matches
621
                    )
622
                    ) {
623 14
                        $operator = $matches[2];
624 14
                        $command = $matches[1];
625 14
                        $offset += \strlen($matches[0]);
626
                    } elseif (preg_match(
627 6
                        '/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si',
628 6
                        substr($text_part, $offset),
629
                        $matches
630
                    )
631
                    ) {
632 6
                        $operator = $matches[2];
633 6
                        $command = $matches[1];
634 6
                        $offset += \strlen($matches[0]);
635
                    }
636 14
                    break;
637
638 14
                case '[':
639 14
                case ']':
640
                    // array object
641 11
                    $type = $char;
642 11
                    if ('[' == $char) {
643 11
                        ++$offset;
644
                        // get elements
645 11
                        $command = $this->getCommandsText($text_part, $offset);
646
647 11
                        if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
648 11
                            $operator = trim($matches[0]);
649 11
                            $offset += \strlen($matches[0]);
650
                        }
651
                    } else {
652 11
                        ++$offset;
653 11
                        break;
654
                    }
655 11
                    break;
656
657 14
                case '<':
658 14
                case '>':
659
                    // array object
660 6
                    $type = $char;
661 6
                    ++$offset;
662 6
                    if ('<' == $char) {
663 6
                        $strpos = strpos($text_part, '>', $offset);
664 6
                        $command = substr($text_part, $offset, ($strpos - $offset));
665 6
                        $offset = $strpos + 1;
666
                    }
667
668 6
                    if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
669 5
                        $operator = trim($matches[0]);
670 5
                        $offset += \strlen($matches[0]);
671
                    }
672 6
                    break;
673
674 14
                case '(':
675 14
                case ')':
676 10
                    ++$offset;
677 10
                    $type = $char;
678 10
                    $strpos = $offset;
679 10
                    if ('(' == $char) {
680 10
                        $open_bracket = 1;
681 10
                        while ($open_bracket > 0) {
682 10
                            if (!isset($text_part[$strpos])) {
683
                                break;
684
                            }
685 10
                            $ch = $text_part[$strpos];
686 10
                            switch ($ch) {
687 10
                                case '\\':
688
                                 // REVERSE SOLIDUS (5Ch) (Backslash)
689
                                    // skip next character
690 9
                                    ++$strpos;
691 9
                                    break;
692
693 10
                                case '(':
694
                                 // LEFT PARENHESIS (28h)
695
                                    ++$open_bracket;
696
                                    break;
697
698 10
                                case ')':
699
                                 // RIGHT PARENTHESIS (29h)
700 10
                                    --$open_bracket;
701 10
                                    break;
702
                            }
703 10
                            ++$strpos;
704
                        }
705 10
                        $command = substr($text_part, $offset, ($strpos - $offset - 1));
706 10
                        $offset = $strpos;
707
708 10
                        if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) {
709 9
                            $operator = $matches[1];
710 9
                            $offset += \strlen($matches[0]);
711
                        }
712
                    }
713 10
                    break;
714
715
                default:
716
717 14
                    if ('ET' == substr($text_part, $offset, 2)) {
718 1
                        break;
719
                    } elseif (preg_match(
720 14
                        '/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
721 14
                        substr($text_part, $offset),
722
                        $matches
723
                    )
724
                    ) {
725 14
                        $operator = trim($matches['id']);
726 14
                        $command = trim($matches['data']);
727 14
                        $offset += \strlen($matches[0]);
728 11
                    } elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) {
729 11
                        $type = 'n';
730 11
                        $command = trim($matches[0]);
731 11
                        $offset += \strlen($matches[0]);
732 8
                    } elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) {
733 8
                        $type = '';
734 8
                        $operator = $matches[1];
735 8
                        $command = '';
736 8
                        $offset += \strlen($matches[0]);
737
                    }
738
            }
739
740 14
            if (false !== $command) {
741 14
                $commands[] = [
742 14
                    self::TYPE => $type,
743 14
                    self::OPERATOR => $operator,
744 14
                    self::COMMAND => $command,
745
                ];
746
            } else {
747 11
                break;
748
            }
749
        }
750
751 14
        return $commands;
752
    }
753
754
    /**
755
     * @param string $content
756
     *
757
     * @return PDFObject
758
     */
759 21
    public static function factory(Document $document, Header $header, $content)
760
    {
761 21
        switch ($header->get('Type')->getContent()) {
762 21
            case 'XObject':
763 6
                switch ($header->get('Subtype')->getContent()) {
764 6
                    case 'Image':
765 5
                        return new Image($document, $header, $content);
766
767 2
                    case 'Form':
768 2
                        return new Form($document, $header, $content);
769
                }
770
771
                return new self($document, $header, $content);
772
773 21
            case 'Pages':
774 21
                return new Pages($document, $header, $content);
775
776 21
            case 'Page':
777 21
                return new Page($document, $header, $content);
778
779 21
            case 'Encoding':
780 3
                return new Encoding($document, $header, $content);
781
782 21
            case 'Font':
783 21
                $subtype = $header->get('Subtype')->getContent();
784 21
                $classname = '\Smalot\PdfParser\Font\Font'.$subtype;
785
786 21
                if (class_exists($classname)) {
787 21
                    return new $classname($document, $header, $content);
788
                }
789
790
                return new Font($document, $header, $content);
791
792
            default:
793 21
                return new self($document, $header, $content);
794
        }
795
    }
796
797
    /**
798
     * Returns unique id identifying the object.
799
     *
800
     * @return string
801
     */
802 8
    protected function getUniqueId()
803
    {
804 8
        return spl_object_hash($this);
805
    }
806
}
807