Passed
Push — issue-387 ( 594a65 )
by Sebastien
02:39
created

PDFObject::getText()   F

Complexity

Conditions 51
Paths 116

Size

Total Lines 189
Code Lines 119

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 107
CRAP Score 54.3038

Importance

Changes 1
Bugs 1 Features 0
Metric Value
cc 51
eloc 119
c 1
b 1
f 0
nc 116
nop 1
dl 0
loc 189
ccs 107
cts 120
cp 0.8917
crap 54.3038
rs 3.2266

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
use Smalot\PdfParser\XObject\Form;
34
use Smalot\PdfParser\XObject\Image;
35
36
/**
37
 * Class PDFObject
38
 */
39
class PDFObject
40
{
41
    const TYPE = 't';
42
43
    const OPERATOR = 'o';
44
45
    const COMMAND = 'c';
46
47
    /**
48
     * The recursion stack.
49
     *
50
     * @var array
51
     */
52
    public static $recursionStack = [];
53
54
    /**
55
     * @var Document
56
     */
57
    protected $document = null;
58
59
    /**
60
     * @var Header
61
     */
62
    protected $header = null;
63
64
    /**
65
     * @var string
66
     */
67
    protected $content = null;
68
69
    /**
70
     * @var Config
71
     */
72
    protected $config;
73
74
    /**
75
     * @param Header $header
76
     * @param string $content
77
     * @param Config $config
78
     */
79 42
    public function __construct(
80
        Document $document,
81
        Header $header = null,
82
        $content = null,
83
        Config $config = null
84
    ) {
85 42
        $this->document = $document;
86 42
        $this->header = null !== $header ? $header : new Header();
87 42
        $this->content = $content;
88 42
        $this->config = $config;
89 42
    }
90
91 34
    public function init()
92
    {
93 34
    }
94
95
    /**
96
     * @return Header|null
97
     */
98 34
    public function getHeader()
99
    {
100 34
        return $this->header;
101
    }
102
103
    /**
104
     * @param string $name
105
     *
106
     * @return Element|PDFObject
107
     */
108 32
    public function get($name)
109
    {
110 32
        return $this->header->get($name);
111
    }
112
113
    /**
114
     * @param string $name
115
     *
116
     * @return bool
117
     */
118 31
    public function has($name)
119
    {
120 31
        return $this->header->has($name);
121
    }
122
123
    /**
124
     * @param bool $deep
125
     *
126
     * @return array
127
     */
128 3
    public function getDetails($deep = true)
129
    {
130 3
        return $this->header->getDetails($deep);
131
    }
132
133
    /**
134
     * @return string|null
135
     */
136 25
    public function getContent()
137
    {
138 25
        return $this->content;
139
    }
140
141
    /**
142
     * @param string $content
143
     */
144 19
    public function cleanContent($content, $char = 'X')
145
    {
146 19
        $char = $char[0];
147 19
        $content = str_replace(['\\\\', '\\)', '\\('], $char.$char, $content);
148
149
        // Remove image bloc with binary content
150 19
        preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
151 19
        foreach ($matches[0] as $part) {
152
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
153
        }
154
155
        // Clean content in square brackets [.....]
156 19
        preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, \PREG_OFFSET_CAPTURE);
0 ignored issues
show
Unused Code introduced by
The call to preg_match_all() has too many arguments starting with PREG_OFFSET_CAPTURE. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

156
        /** @scrutinizer ignore-call */ 
157
        preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, \PREG_OFFSET_CAPTURE);

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
157 19
        foreach ($matches[1] as $part) {
158 15
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
159
        }
160
161
        // Clean content in round brackets (.....)
162 19
        preg_match_all('/\((.*?)\)/s', $content, $matches, \PREG_OFFSET_CAPTURE);
163 19
        foreach ($matches[1] as $part) {
164 14
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
165
        }
166
167
        // Clean structure
168 19
        if ($parts = preg_split('/(<|>)/s', $content, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE)) {
0 ignored issues
show
Bug introduced by
It seems like $content can also be of type array; however, parameter $subject of preg_split() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

168
        if ($parts = preg_split('/(<|>)/s', /** @scrutinizer ignore-type */ $content, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE)) {
Loading history...
169 19
            $content = '';
170 19
            $level = 0;
171 19
            foreach ($parts as $part) {
172 19
                if ('<' == $part) {
173 12
                    ++$level;
174
                }
175
176 19
                $content .= (0 == $level ? $part : str_repeat($char, \strlen($part)));
177
178 19
                if ('>' == $part) {
179 12
                    --$level;
180
                }
181
            }
182
        }
183
184
        // Clean BDC and EMC markup
185 19
        preg_match_all(
186 19
            '/(\/[A-Za-z0-9\_]*\s*'.preg_quote($char).'*BDC)/s',
187
            $content,
188
            $matches,
189 19
            \PREG_OFFSET_CAPTURE
190
        );
191 19
        foreach ($matches[1] as $part) {
192 4
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
193
        }
194
195 19
        preg_match_all('/\s(EMC)\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
196 19
        foreach ($matches[1] as $part) {
197 7
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
198
        }
199
200 19
        return $content;
201
    }
202
203
    /**
204
     * @param string $content
205
     *
206
     * @return array
207
     */
208 18
    public function getSectionsText($content)
209
    {
210 18
        $sections = [];
211 18
        $content = ' '.$content.' ';
212 18
        $textCleaned = $this->cleanContent($content, '_');
213
214
        // Extract text blocks.
215 18
        if (preg_match_all('/(\sQ)?\s+BT[\s|\(|\[]+(.*?)\s*ET(\sq)?/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
0 ignored issues
show
Unused Code introduced by
The call to preg_match_all() has too many arguments starting with PREG_OFFSET_CAPTURE. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

215
        if (/** @scrutinizer ignore-call */ preg_match_all('/(\sQ)?\s+BT[\s|\(|\[]+(.*?)\s*ET(\sq)?/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
216 18
            foreach ($matches[2] as $pos => $part) {
217 18
                $text = $part[0];
218 18
                if ('' === $text) {
219
                    continue;
220
                }
221 18
                $offset = $part[1];
222 18
                $section = substr($content, $offset, \strlen($text));
223
224
                // Removes BDC and EMC markup.
225 18
                $section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section.' ');
226
227
                // Add Q and q flags if detected around BT/ET.
228
                // @see: https://github.com/smalot/pdfparser/issues/387
229 18
                $section = trim((!empty($matches[1][$pos][0]) ? "Q\n" : '') . $section) . (!empty($matches[3][$pos][0]) ? "\nq" : '');
230
231 18
                $sections[] = $section;
232
            }
233
        }
234
235
        // Extract 'do' commands.
236 18
        if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
237 4
            foreach ($matches[1] as $part) {
238 4
                $text = $part[0];
239 4
                $offset = $part[1];
240 4
                $section = substr($content, $offset, \strlen($text));
241
242 4
                $sections[] = $section;
243
            }
244
        }
245
246 18
        return $sections;
247
    }
248
249 11
    private function getDefaultFont(Page $page = null)
250
    {
251 11
        $fonts = [];
252 11
        if (null !== $page) {
253 11
            $fonts = $page->getFonts();
254
        }
255
256 11
        $fonts = array_merge($fonts, array_values($this->document->getFonts()));
257
258 11
        if (\count($fonts) > 0) {
259 11
            return reset($fonts);
260
        }
261
262
        return new Font($this->document);
263
    }
264
265
    /**
266
     * @param Page $page
267
     *
268
     * @return string
269
     *
270
     * @throws \Exception
271
     */
272 11
    public function getText(Page $page = null)
273
    {
274 11
        $text = '';
275 11
        $sections = $this->getSectionsText($this->content);
276 11
        $current_font = $this->getDefaultFont($page);
277 11
        $clipped_font = $current_font;
278
279 11
        $current_position_td = ['x' => false, 'y' => false];
280 11
        $current_position_tm = ['x' => false, 'y' => false];
281
282 11
        self::$recursionStack[] = $this->getUniqueId();
283
284 11
        foreach ($sections as $section) {
285 11
            $commands = $this->getCommandsText($section);
286
287 11
            foreach ($commands as $command) {
288 11
                switch ($command[self::OPERATOR]) {
289
                    // set character spacing
290 11
                    case 'Tc':
291 3
                        break;
292
293
                    // move text current point
294 11
                    case 'Td':
295 8
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
296 8
                        $y = array_pop($args);
297 8
                        $x = array_pop($args);
298 8
                        if (((float) $x <= 0) ||
299 8
                            (false !== $current_position_td['y'] && (float) $y < (float) ($current_position_td['y']))
300
                        ) {
301
                            // vertical offset
302 5
                            $text .= "\n";
303 8
                        } elseif (false !== $current_position_td['x'] && (float) $x > (float) (
304 8
                                $current_position_td['x']
305
                            )
306
                        ) {
307
                            // horizontal offset
308 6
                            $text .= ' ';
309
                        }
310 8
                        $current_position_td = ['x' => $x, 'y' => $y];
311 8
                        break;
312
313
                    // move text current point and set leading
314 11
                    case 'TD':
315 2
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
316 2
                        $y = array_pop($args);
317 2
                        $x = array_pop($args);
318 2
                        if ((float) $y < 0) {
319 2
                            $text .= "\n";
320
                        } elseif ((float) $x <= 0) {
321
                            $text .= ' ';
322
                        }
323 2
                        break;
324
325 11
                    case 'Tf':
326 11
                        list($id) = preg_split('/\s/s', $command[self::COMMAND]);
327 11
                        $id = trim($id, '/');
328 11
                        if (null !== $page) {
329 11
                            $new_font = $page->getFont($id);
330
                            // If an invalid font ID is given, do not update the font.
331
                            // This should theoretically never happen, as the PDF spec states for the Tf operator:
332
                            // "The specified font value shall match a resource name in the Font entry of the default resource dictionary"
333
                            // (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf, page 435)
334
                            // But we want to make sure that malformed PDFs do not simply crash.
335 11
                            if (null !== $new_font) {
336 10
                                $current_font = $new_font;
337
                            }
338
                        }
339 11
                        break;
340
341 11
                    case 'Q':
342
                        // Use clip: restore font.
343 3
                        $current_font = $clipped_font;
344 3
                        break;
345
346 11
                    case 'q':
347
                        // Use clip: save font.
348 3
                        $clipped_font = $current_font;
349 3
                        break;
350
351 11
                    case "'":
352 11
                    case 'Tj':
353 8
                        $command[self::COMMAND] = [$command];
354
                        // no break
355 11
                    case 'TJ':
356 11
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
357 11
                        $text .= $sub_text;
358 11
                        break;
359
360
                    // set leading
361 10
                    case 'TL':
362 1
                        $text .= ' ';
363 1
                        break;
364
365 10
                    case 'Tm':
366 10
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
367 10
                        $y = array_pop($args);
368 10
                        $x = array_pop($args);
369 10
                        if (false !== $current_position_tm['x']) {
370 10
                            $delta = abs((float) $x - (float) ($current_position_tm['x']));
371 10
                            if ($delta > 10) {
372 8
                                $text .= "\t";
373
                            }
374
                        }
375 10
                        if (false !== $current_position_tm['y']) {
376 10
                            $delta = abs((float) $y - (float) ($current_position_tm['y']));
377 10
                            if ($delta > 10) {
378 6
                                $text .= "\n";
379
                            }
380
                        }
381 10
                        $current_position_tm = ['x' => $x, 'y' => $y];
382 10
                        break;
383
384
                    // set super/subscripting text rise
385 7
                    case 'Ts':
386
                        break;
387
388
                    // set word spacing
389 7
                    case 'Tw':
390 2
                        break;
391
392
                    // set horizontal scaling
393 7
                    case 'Tz':
394
                        $text .= "\n";
395
                        break;
396
397
                    // move to start of next line
398 7
                    case 'T*':
399 3
                        $text .= "\n";
400 3
                        break;
401
402 6
                    case 'Da':
403
                        break;
404
405 6
                    case 'Do':
406 4
                        if (null !== $page) {
407 4
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
408 4
                            $id = trim(array_pop($args), '/ ');
409 4
                            $xobject = $page->getXObject($id);
410
411
                            // @todo $xobject could be a ElementXRef object, which would then throw an error
412 4
                            if (\is_object($xobject) && $xobject instanceof self && !\in_array($xobject->getUniqueId(), self::$recursionStack)) {
413
                                // Not a circular reference.
414 4
                                $text .= $xobject->getText($page);
415
                            }
416
                        }
417 4
                        break;
418
419 5
                    case 'rg':
420 5
                    case 'RG':
421 2
                        break;
422
423 5
                    case 're':
424
                        break;
425
426 5
                    case 'co':
427
                        break;
428
429 5
                    case 'cs':
430 1
                        break;
431
432 5
                    case 'gs':
433 4
                        break;
434
435 4
                    case 'en':
436
                        break;
437
438 4
                    case 'sc':
439 4
                    case 'SC':
440
                        break;
441
442 4
                    case 'g':
443 4
                    case 'G':
444 2
                        break;
445
446 3
                    case 'V':
447
                        break;
448
449 3
                    case 'vo':
450 3
                    case 'Vo':
451
                        break;
452
453
                    default:
454
                }
455
            }
456
        }
457
458 11
        array_pop(self::$recursionStack);
459
460 11
        return $text.' ';
461
    }
462
463
    /**
464
     * @param Page $page
465
     *
466
     * @return array
467
     *
468
     * @throws \Exception
469
     */
470 3
    public function getTextArray(Page $page = null)
471
    {
472 3
        $text = [];
473 3
        $sections = $this->getSectionsText($this->content);
474 3
        $current_font = new Font($this->document);
475
476 3
        foreach ($sections as $section) {
477 3
            $commands = $this->getCommandsText($section);
478
479 3
            foreach ($commands as $command) {
480 3
                switch ($command[self::OPERATOR]) {
481
                    // set character spacing
482 3
                    case 'Tc':
483 2
                        break;
484
485
                    // move text current point
486 3
                    case 'Td':
487 3
                        break;
488
489
                    // move text current point and set leading
490 3
                    case 'TD':
491
                        break;
492
493 3
                    case 'Tf':
494 3
                        if (null !== $page) {
495 3
                            list($id) = preg_split('/\s/s', $command[self::COMMAND]);
496 3
                            $id = trim($id, '/');
497 3
                            $current_font = $page->getFont($id);
498
                        }
499 3
                        break;
500
501 3
                    case "'":
502 3
                    case 'Tj':
503 3
                        $command[self::COMMAND] = [$command];
504
                        // no break
505 3
                    case 'TJ':
506 3
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
507 3
                        $text[] = $sub_text;
508 3
                        break;
509
510
                    // set leading
511 3
                    case 'TL':
512 2
                        break;
513
514 3
                    case 'Tm':
515 2
                        break;
516
517
                    // set super/subscripting text rise
518 3
                    case 'Ts':
519
                        break;
520
521
                    // set word spacing
522 3
                    case 'Tw':
523 1
                        break;
524
525
                    // set horizontal scaling
526 3
                    case 'Tz':
527
                        //$text .= "\n";
528
                        break;
529
530
                    // move to start of next line
531 3
                    case 'T*':
532
                        //$text .= "\n";
533 2
                        break;
534
535 3
                    case 'Da':
536
                        break;
537
538 3
                    case 'Do':
539
                        if (null !== $page) {
540
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
541
                            $id = trim(array_pop($args), '/ ');
542
                            if ($xobject = $page->getXObject($id)) {
543
                                $text[] = $xobject->getText($page);
544
                            }
545
                        }
546
                        break;
547
548 3
                    case 'rg':
549 3
                    case 'RG':
550 2
                        break;
551
552 3
                    case 're':
553
                        break;
554
555 3
                    case 'co':
556
                        break;
557
558 3
                    case 'cs':
559
                        break;
560
561 3
                    case 'gs':
562
                        break;
563
564 3
                    case 'en':
565
                        break;
566
567 3
                    case 'sc':
568 3
                    case 'SC':
569
                        break;
570
571 3
                    case 'g':
572 3
                    case 'G':
573 2
                        break;
574
575 1
                    case 'V':
576
                        break;
577
578 1
                    case 'vo':
579 1
                    case 'Vo':
580
                        break;
581
582
                    default:
583
                }
584
            }
585
        }
586
587 3
        return $text;
588
    }
589
590
    /**
591
     * @param string $text_part
592
     * @param int    $offset
593
     *
594
     * @return array
595
     */
596 18
    public function getCommandsText($text_part, &$offset = 0)
597
    {
598 18
        $commands = $matches = [];
599
600 18
        while ($offset < \strlen($text_part)) {
601 18
            $offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset);
602 18
            $char = $text_part[$offset];
603
604 18
            $operator = '';
605 18
            $type = '';
606 18
            $command = false;
607
608 18
            switch ($char) {
609 18
                case '/':
610 18
                    $type = $char;
611 18
                    if (preg_match(
612 18
                        '/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
613 18
                        substr($text_part, $offset),
614
                        $matches
615
                    )
616
                    ) {
617 18
                        $operator = $matches[2];
618 18
                        $command = $matches[1];
619 18
                        $offset += \strlen($matches[0]);
620 6
                    } elseif (preg_match(
621 6
                        '/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si',
622 6
                        substr($text_part, $offset),
623
                        $matches
624
                    )
625
                    ) {
626 6
                        $operator = $matches[2];
627 6
                        $command = $matches[1];
628 6
                        $offset += \strlen($matches[0]);
629
                    }
630 18
                    break;
631
632 18
                case '[':
633 18
                case ']':
634
                    // array object
635 17
                    $type = $char;
636 17
                    if ('[' == $char) {
637 17
                        ++$offset;
638
                        // get elements
639 17
                        $command = $this->getCommandsText($text_part, $offset);
640
641 17
                        if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
642 17
                            $operator = trim($matches[0]);
643 17
                            $offset += \strlen($matches[0]);
644
                        }
645
                    } else {
646 17
                        ++$offset;
647 17
                        break;
648
                    }
649 17
                    break;
650
651 18
                case '<':
652 18
                case '>':
653
                    // array object
654 8
                    $type = $char;
655 8
                    ++$offset;
656 8
                    if ('<' == $char) {
657 8
                        $strpos = strpos($text_part, '>', $offset);
658 8
                        $command = substr($text_part, $offset, ($strpos - $offset));
659 8
                        $offset = $strpos + 1;
660
                    }
661
662 8
                    if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
663 7
                        $operator = trim($matches[0]);
664 7
                        $offset += \strlen($matches[0]);
665
                    }
666 8
                    break;
667
668 18
                case '(':
669 18
                case ')':
670 14
                    ++$offset;
671 14
                    $type = $char;
672 14
                    $strpos = $offset;
673 14
                    if ('(' == $char) {
674 14
                        $open_bracket = 1;
675 14
                        while ($open_bracket > 0) {
676 14
                            if (!isset($text_part[$strpos])) {
677
                                break;
678
                            }
679 14
                            $ch = $text_part[$strpos];
680 14
                            switch ($ch) {
681 14
                                case '\\':
682
                                 // REVERSE SOLIDUS (5Ch) (Backslash)
683
                                    // skip next character
684 11
                                    ++$strpos;
685 11
                                    break;
686
687 14
                                case '(':
688
                                 // LEFT PARENHESIS (28h)
689
                                    ++$open_bracket;
690
                                    break;
691
692 14
                                case ')':
693
                                 // RIGHT PARENTHESIS (29h)
694 14
                                    --$open_bracket;
695 14
                                    break;
696
                            }
697 14
                            ++$strpos;
698
                        }
699 14
                        $command = substr($text_part, $offset, ($strpos - $offset - 1));
700 14
                        $offset = $strpos;
701
702 14
                        if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) {
703 12
                            $operator = $matches[1];
704 12
                            $offset += \strlen($matches[0]);
705
                        }
706
                    }
707 14
                    break;
708
709
                default:
710 18
                    if ('ET' == substr($text_part, $offset, 2)) {
711 1
                        break;
712 18
                    } elseif (preg_match(
713 18
                        '/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
714 18
                        substr($text_part, $offset),
715
                        $matches
716
                    )
717
                    ) {
718 18
                        $operator = trim($matches['id']);
719 18
                        $command = trim($matches['data']);
720 18
                        $offset += \strlen($matches[0]);
721 16
                    } elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) {
722 16
                        $type = 'n';
723 16
                        $command = trim($matches[0]);
724 16
                        $offset += \strlen($matches[0]);
725 10
                    } elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) {
726 10
                        $type = '';
727 10
                        $operator = $matches[1];
728 10
                        $command = '';
729 10
                        $offset += \strlen($matches[0]);
730
                    }
731
            }
732
733 18
            if (false !== $command) {
734 18
                $commands[] = [
735 18
                    self::TYPE => $type,
736 18
                    self::OPERATOR => $operator,
737 18
                    self::COMMAND => $command,
738
                ];
739
            } else {
740 17
                break;
741
            }
742
        }
743
744 18
        return $commands;
745
    }
746
747
    /**
748
     * @param string $content
749
     *
750
     * @return PDFObject
751
     */
752 27
    public static function factory(
753
        Document $document,
754
        Header $header,
755
        $content,
756
        Config $config = null
757
    ) {
758 27
        switch ($header->get('Type')->getContent()) {
759 27
            case 'XObject':
760 5
                switch ($header->get('Subtype')->getContent()) {
761 5
                    case 'Image':
762 3
                        return new Image($document, $header, $content, $config);
763
764 3
                    case 'Form':
765 3
                        return new Form($document, $header, $content, $config);
766
                }
767
768
                return new self($document, $header, $content, $config);
769
770 27
            case 'Pages':
771 26
                return new Pages($document, $header, $content, $config);
772
773 27
            case 'Page':
774 26
                return new Page($document, $header, $content, $config);
775
776 27
            case 'Encoding':
777 6
                return new Encoding($document, $header, $content, $config);
778
779 27
            case 'Font':
780 26
                $subtype = $header->get('Subtype')->getContent();
781 26
                $classname = '\Smalot\PdfParser\Font\Font'.$subtype;
782
783 26
                if (class_exists($classname)) {
784 26
                    return new $classname($document, $header, $content, $config);
785
                }
786
787
                return new Font($document, $header, $content, $config);
788
789
            default:
790 27
                return new self($document, $header, $content, $config);
791
        }
792
    }
793
794
    /**
795
     * Returns unique id identifying the object.
796
     *
797
     * @return string
798
     */
799 11
    protected function getUniqueId()
800
    {
801 11
        return spl_object_hash($this);
802
    }
803
}
804