Passed
Push — master ( 1048d7...43e436 )
by Konrad
02:47
created

PDFObject::has()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
eloc 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
ccs 2
cts 2
cp 1
cc 1
nc 1
nop 1
crap 1
1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
use Smalot\PdfParser\XObject\Form;
34
use Smalot\PdfParser\XObject\Image;
35
36
/**
37
 * Class PDFObject
38
 */
39
class PDFObject
40
{
41
    const TYPE = 't';
42
43
    const OPERATOR = 'o';
44
45
    const COMMAND = 'c';
46
47
    /**
48
     * The recursion stack.
49
     *
50
     * @var array
51
     */
52
    public static $recursionStack = [];
53
54
    /**
55
     * @var Document
56
     */
57
    protected $document = null;
58
59
    /**
60
     * @var Header
61
     */
62
    protected $header = null;
63
64
    /**
65
     * @var string
66
     */
67
    protected $content = null;
68
69
    /**
70
     * @var Config
71
     */
72
    protected $config;
73
74
    /**
75
     * @param Header $header
76
     * @param string $content
77
     * @param Config $config
78
     */
79 46
    public function __construct(
80
        Document $document,
81
        Header $header = null,
82
        $content = null,
83
        Config $config = null
84
    ) {
85 46
        $this->document = $document;
86 46
        $this->header = null !== $header ? $header : new Header();
87 46
        $this->content = $content;
88 46
        $this->config = $config;
89 46
    }
90
91 36
    public function init()
92
    {
93 36
    }
94
95
    /**
96
     * @return Header|null
97
     */
98 36
    public function getHeader()
99
    {
100 36
        return $this->header;
101
    }
102
103
    /**
104
     * @param string $name
105
     *
106
     * @return Element|PDFObject
107
     */
108 36
    public function get($name)
109
    {
110 36
        return $this->header->get($name);
111
    }
112
113
    /**
114
     * @param string $name
115
     *
116
     * @return bool
117
     */
118 34
    public function has($name)
119
    {
120 34
        return $this->header->has($name);
121
    }
122
123
    /**
124
     * @param bool $deep
125
     *
126
     * @return array
127
     */
128 2
    public function getDetails($deep = true)
129
    {
130 2
        return $this->header->getDetails($deep);
131
    }
132
133
    /**
134
     * @return string|null
135
     */
136 27
    public function getContent()
137
    {
138 27
        return $this->content;
139
    }
140
141
    /**
142
     * @param string $content
143
     */
144 21
    public function cleanContent($content, $char = 'X')
145
    {
146 21
        $char = $char[0];
147 21
        $content = str_replace(['\\\\', '\\)', '\\('], $char.$char, $content);
148
149
        // Remove image bloc with binary content
150 21
        preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
151 21
        foreach ($matches[0] as $part) {
152
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
153
        }
154
155
        // Clean content in square brackets [.....]
156 21
        preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, \PREG_OFFSET_CAPTURE);
0 ignored issues
show
Unused Code introduced by
The call to preg_match_all() has too many arguments starting with PREG_OFFSET_CAPTURE. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

156
        /** @scrutinizer ignore-call */ 
157
        preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, \PREG_OFFSET_CAPTURE);

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
157 21
        foreach ($matches[1] as $part) {
158 15
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
159
        }
160
161
        // Clean content in round brackets (.....)
162 21
        preg_match_all('/\((.*?)\)/s', $content, $matches, \PREG_OFFSET_CAPTURE);
163 21
        foreach ($matches[1] as $part) {
164 14
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
165
        }
166
167
        // Clean structure
168 21
        if ($parts = preg_split('/(<|>)/s', $content, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE)) {
0 ignored issues
show
Bug introduced by
It seems like $content can also be of type array; however, parameter $subject of preg_split() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

168
        if ($parts = preg_split('/(<|>)/s', /** @scrutinizer ignore-type */ $content, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE)) {
Loading history...
169 21
            $content = '';
170 21
            $level = 0;
171 21
            foreach ($parts as $part) {
172 21
                if ('<' == $part) {
173 14
                    ++$level;
174
                }
175
176 21
                $content .= (0 == $level ? $part : str_repeat($char, \strlen($part)));
177
178 21
                if ('>' == $part) {
179 14
                    --$level;
180
                }
181
            }
182
        }
183
184
        // Clean BDC and EMC markup
185 21
        preg_match_all(
186 21
            '/(\/[A-Za-z0-9\_]*\s*'.preg_quote($char).'*BDC)/s',
187
            $content,
188
            $matches,
189 21
            \PREG_OFFSET_CAPTURE
190
        );
191 21
        foreach ($matches[1] as $part) {
192 3
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
193
        }
194
195 21
        preg_match_all('/\s(EMC)\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
196 21
        foreach ($matches[1] as $part) {
197 7
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
198
        }
199
200 21
        return $content;
201
    }
202
203
    /**
204
     * @param string $content
205
     *
206
     * @return array
207
     */
208 20
    public function getSectionsText($content)
209
    {
210 20
        $sections = [];
211 20
        $content = ' '.$content.' ';
212 20
        $textCleaned = $this->cleanContent($content, '_');
213
214
        // Extract text blocks.
215 20
        if (preg_match_all('/(\sQ)?\s+BT[\s|\(|\[]+(.*?)\s*ET(\sq)?/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
0 ignored issues
show
Unused Code introduced by
The call to preg_match_all() has too many arguments starting with PREG_OFFSET_CAPTURE. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

215
        if (/** @scrutinizer ignore-call */ preg_match_all('/(\sQ)?\s+BT[\s|\(|\[]+(.*?)\s*ET(\sq)?/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
216 20
            foreach ($matches[2] as $pos => $part) {
217 20
                $text = $part[0];
218 20
                if ('' === $text) {
219
                    continue;
220
                }
221 20
                $offset = $part[1];
222 20
                $section = substr($content, $offset, \strlen($text));
223
224
                // Removes BDC and EMC markup.
225 20
                $section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section.' ');
226
227
                // Add Q and q flags if detected around BT/ET.
228
                // @see: https://github.com/smalot/pdfparser/issues/387
229 20
                $section = trim((!empty($matches[1][$pos][0]) ? "Q\n" : '').$section).(!empty($matches[3][$pos][0]) ? "\nq" : '');
230
231 20
                $sections[] = $section;
232
            }
233
        }
234
235
        // Extract 'do' commands.
236 20
        if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
237 4
            foreach ($matches[1] as $part) {
238 4
                $text = $part[0];
239 4
                $offset = $part[1];
240 4
                $section = substr($content, $offset, \strlen($text));
241
242 4
                $sections[] = $section;
243
            }
244
        }
245
246 20
        return $sections;
247
    }
248
249 13
    private function getDefaultFont(Page $page = null)
250
    {
251 13
        $fonts = [];
252 13
        if (null !== $page) {
253 13
            $fonts = $page->getFonts();
254
        }
255
256 13
        $fonts = array_merge($fonts, array_values($this->document->getFonts()));
257
258 13
        if (\count($fonts) > 0) {
259 13
            return reset($fonts);
260
        }
261
262
        return new Font($this->document, null, null, $this->config);
263
    }
264
265
    /**
266
     * @param Page $page
267
     *
268
     * @return string
269
     *
270
     * @throws \Exception
271
     */
272 13
    public function getText(Page $page = null)
273
    {
274 13
        $result = '';
275 13
        $sections = $this->getSectionsText($this->content);
276 13
        $current_font = $this->getDefaultFont($page);
277 13
        $clipped_font = $current_font;
278
279 13
        $current_position_td = ['x' => false, 'y' => false];
280 13
        $current_position_tm = ['x' => false, 'y' => false];
281
282 13
        self::$recursionStack[] = $this->getUniqueId();
283
284 13
        foreach ($sections as $section) {
285 13
            $commands = $this->getCommandsText($section);
286 13
            $reverse_text = false;
287 13
            $text = '';
288
289 13
            foreach ($commands as $command) {
290 13
                switch ($command[self::OPERATOR]) {
291 13
                    case 'BMC':
292 1
                        if ('ReversedChars' == $command[self::COMMAND]) {
293 1
                            $reverse_text = true;
294
                        }
295 1
                        break;
296
297
                    // set character spacing
298 13
                    case 'Tc':
299 2
                        break;
300
301
                    // move text current point
302 13
                    case 'Td':
303 10
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
304 10
                        $y = array_pop($args);
305 10
                        $x = array_pop($args);
306 10
                        if (((float) $x <= 0) ||
307 10
                            (false !== $current_position_td['y'] && (float) $y < (float) ($current_position_td['y']))
308
                        ) {
309
                            // vertical offset
310 6
                            $text .= "\n";
311 10
                        } elseif (false !== $current_position_td['x'] && (float) $x > (float) (
312 10
                                $current_position_td['x']
313
                            )
314
                        ) {
315
                            // horizontal offset
316 7
                            $text .= ' ';
317
                        }
318 10
                        $current_position_td = ['x' => $x, 'y' => $y];
319 10
                        break;
320
321
                    // move text current point and set leading
322 13
                    case 'TD':
323 1
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
324 1
                        $y = array_pop($args);
325 1
                        $x = array_pop($args);
326 1
                        if ((float) $y < 0) {
327 1
                            $text .= "\n";
328
                        } elseif ((float) $x <= 0) {
329
                            $text .= ' ';
330
                        }
331 1
                        break;
332
333 13
                    case 'Tf':
334 13
                        list($id) = preg_split('/\s/s', $command[self::COMMAND]);
335 13
                        $id = trim($id, '/');
336 13
                        if (null !== $page) {
337 13
                            $new_font = $page->getFont($id);
338
                            // If an invalid font ID is given, do not update the font.
339
                            // This should theoretically never happen, as the PDF spec states for the Tf operator:
340
                            // "The specified font value shall match a resource name in the Font entry of the default resource dictionary"
341
                            // (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf, page 435)
342
                            // But we want to make sure that malformed PDFs do not simply crash.
343 13
                            if (null !== $new_font) {
344 12
                                $current_font = $new_font;
345
                            }
346
                        }
347 13
                        break;
348
349 13
                    case 'Q':
350
                        // Use clip: restore font.
351 3
                        $current_font = $clipped_font;
352 3
                        break;
353
354 13
                    case 'q':
355
                        // Use clip: save font.
356 3
                        $clipped_font = $current_font;
357 3
                        break;
358
359 13
                    case "'":
360 13
                    case 'Tj':
361 8
                        $command[self::COMMAND] = [$command];
362
                        // no break
363 13
                    case 'TJ':
364 13
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
365 13
                        $text .= $sub_text;
366 13
                        break;
367
368
                    // set leading
369 11
                    case 'TL':
370 1
                        $text .= ' ';
371 1
                        break;
372
373 11
                    case 'Tm':
374 11
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
375 11
                        $y = array_pop($args);
376 11
                        $x = array_pop($args);
377 11
                        if (false !== $current_position_tm['x']) {
378 11
                            $delta = abs((float) $x - (float) ($current_position_tm['x']));
379 11
                            if ($delta > 10) {
380 9
                                $text .= "\t";
381
                            }
382
                        }
383 11
                        if (false !== $current_position_tm['y']) {
384 11
                            $delta = abs((float) $y - (float) ($current_position_tm['y']));
385 11
                            if ($delta > 10) {
386 7
                                $text .= "\n";
387
                            }
388
                        }
389 11
                        $current_position_tm = ['x' => $x, 'y' => $y];
390 11
                        break;
391
392
                    // set super/subscripting text rise
393 8
                    case 'Ts':
394
                        break;
395
396
                    // set word spacing
397 8
                    case 'Tw':
398 1
                        break;
399
400
                    // set horizontal scaling
401 8
                    case 'Tz':
402
                        $text .= "\n";
403
                        break;
404
405
                    // move to start of next line
406 8
                    case 'T*':
407 2
                        $text .= "\n";
408 2
                        break;
409
410 7
                    case 'Da':
411
                        break;
412
413 7
                    case 'Do':
414 4
                        if (null !== $page) {
415 4
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
416 4
                            $id = trim(array_pop($args), '/ ');
417 4
                            $xobject = $page->getXObject($id);
418
419
                            // @todo $xobject could be a ElementXRef object, which would then throw an error
420 4
                            if (\is_object($xobject) && $xobject instanceof self && !\in_array($xobject->getUniqueId(), self::$recursionStack)) {
421
                                // Not a circular reference.
422 4
                                $text .= $xobject->getText($page);
423
                            }
424
                        }
425 4
                        break;
426
427 5
                    case 'rg':
428 5
                    case 'RG':
429 1
                        break;
430
431 5
                    case 're':
432
                        break;
433
434 5
                    case 'co':
435
                        break;
436
437 5
                    case 'cs':
438
                        break;
439
440 5
                    case 'gs':
441 3
                        break;
442
443 4
                    case 'en':
444
                        break;
445
446 4
                    case 'sc':
447 4
                    case 'SC':
448
                        break;
449
450 4
                    case 'g':
451 4
                    case 'G':
452 1
                        break;
453
454 3
                    case 'V':
455
                        break;
456
457 3
                    case 'vo':
458 3
                    case 'Vo':
459
                        break;
460
461
                    default:
462
                }
463
            }
464
465
            // Fix Hebrew and other reverse text oriented languages.
466
            // @see: https://github.com/smalot/pdfparser/issues/398
467 13
            if ($reverse_text) {
468 1
                $chars = mb_str_split($text, 1, mb_internal_encoding());
0 ignored issues
show
Bug introduced by
It seems like mb_internal_encoding() can also be of type true; however, parameter $encoding of mb_str_split() does only seem to accept null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

468
                $chars = mb_str_split($text, 1, /** @scrutinizer ignore-type */ mb_internal_encoding());
Loading history...
469 1
                $text = implode('', array_reverse($chars));
470
            }
471
472 13
            $result .= $text;
473
        }
474
475 13
        array_pop(self::$recursionStack);
476
477 13
        return $result.' ';
478
    }
479
480
    /**
481
     * @param Page $page
482
     *
483
     * @return array
484
     *
485
     * @throws \Exception
486
     */
487 3
    public function getTextArray(Page $page = null)
488
    {
489 3
        $text = [];
490 3
        $sections = $this->getSectionsText($this->content);
491 3
        $current_font = new Font($this->document, null, null, $this->config);
492
493 3
        foreach ($sections as $section) {
494 3
            $commands = $this->getCommandsText($section);
495
496 3
            foreach ($commands as $command) {
497 3
                switch ($command[self::OPERATOR]) {
498
                    // set character spacing
499 3
                    case 'Tc':
500 2
                        break;
501
502
                    // move text current point
503 3
                    case 'Td':
504 3
                        break;
505
506
                    // move text current point and set leading
507 3
                    case 'TD':
508
                        break;
509
510 3
                    case 'Tf':
511 3
                        if (null !== $page) {
512 3
                            list($id) = preg_split('/\s/s', $command[self::COMMAND]);
513 3
                            $id = trim($id, '/');
514 3
                            $current_font = $page->getFont($id);
515
                        }
516 3
                        break;
517
518 3
                    case "'":
519 3
                    case 'Tj':
520 3
                        $command[self::COMMAND] = [$command];
521
                        // no break
522 3
                    case 'TJ':
523 3
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
524 3
                        $text[] = $sub_text;
525 3
                        break;
526
527
                    // set leading
528 3
                    case 'TL':
529 2
                        break;
530
531 3
                    case 'Tm':
532 2
                        break;
533
534
                    // set super/subscripting text rise
535 3
                    case 'Ts':
536
                        break;
537
538
                    // set word spacing
539 3
                    case 'Tw':
540 1
                        break;
541
542
                    // set horizontal scaling
543 3
                    case 'Tz':
544
                        //$text .= "\n";
545
                        break;
546
547
                    // move to start of next line
548 3
                    case 'T*':
549
                        //$text .= "\n";
550 2
                        break;
551
552 3
                    case 'Da':
553
                        break;
554
555 3
                    case 'Do':
556
                        if (null !== $page) {
557
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
558
                            $id = trim(array_pop($args), '/ ');
559
                            if ($xobject = $page->getXObject($id)) {
560
                                $text[] = $xobject->getText($page);
561
                            }
562
                        }
563
                        break;
564
565 3
                    case 'rg':
566 3
                    case 'RG':
567 2
                        break;
568
569 3
                    case 're':
570
                        break;
571
572 3
                    case 'co':
573
                        break;
574
575 3
                    case 'cs':
576
                        break;
577
578 3
                    case 'gs':
579
                        break;
580
581 3
                    case 'en':
582
                        break;
583
584 3
                    case 'sc':
585 3
                    case 'SC':
586
                        break;
587
588 3
                    case 'g':
589 3
                    case 'G':
590 2
                        break;
591
592 1
                    case 'V':
593
                        break;
594
595 1
                    case 'vo':
596 1
                    case 'Vo':
597
                        break;
598
599
                    default:
600
                }
601
            }
602
        }
603
604 3
        return $text;
605
    }
606
607
    /**
608
     * @param string $text_part
609
     * @param int    $offset
610
     *
611
     * @return array
612
     */
613 20
    public function getCommandsText($text_part, &$offset = 0)
614
    {
615 20
        $commands = $matches = [];
616
617 20
        while ($offset < \strlen($text_part)) {
618 20
            $offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset);
619 20
            $char = $text_part[$offset];
620
621 20
            $operator = '';
622 20
            $type = '';
623 20
            $command = false;
624
625 20
            switch ($char) {
626 20
                case '/':
627 20
                    $type = $char;
628 20
                    if (preg_match(
629 20
                        '/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
630 20
                        substr($text_part, $offset),
631
                        $matches
632
                    )
633
                    ) {
634 20
                        $operator = $matches[2];
635 20
                        $command = $matches[1];
636 20
                        $offset += \strlen($matches[0]);
637 7
                    } elseif (preg_match(
638 7
                        '/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si',
639 7
                        substr($text_part, $offset),
640
                        $matches
641
                    )
642
                    ) {
643 7
                        $operator = $matches[2];
644 7
                        $command = $matches[1];
645 7
                        $offset += \strlen($matches[0]);
646
                    }
647 20
                    break;
648
649 20
                case '[':
650 20
                case ']':
651
                    // array object
652 18
                    $type = $char;
653 18
                    if ('[' == $char) {
654 18
                        ++$offset;
655
                        // get elements
656 18
                        $command = $this->getCommandsText($text_part, $offset);
657
658 18
                        if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
659 18
                            $operator = trim($matches[0]);
660 18
                            $offset += \strlen($matches[0]);
661
                        }
662
                    } else {
663 18
                        ++$offset;
664 18
                        break;
665
                    }
666 18
                    break;
667
668 20
                case '<':
669 20
                case '>':
670
                    // array object
671 10
                    $type = $char;
672 10
                    ++$offset;
673 10
                    if ('<' == $char) {
674 10
                        $strpos = strpos($text_part, '>', $offset);
675 10
                        $command = substr($text_part, $offset, ($strpos - $offset));
676 10
                        $offset = $strpos + 1;
677
                    }
678
679 10
                    if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
680 7
                        $operator = trim($matches[0]);
681 7
                        $offset += \strlen($matches[0]);
682
                    }
683 10
                    break;
684
685 20
                case '(':
686 20
                case ')':
687 13
                    ++$offset;
688 13
                    $type = $char;
689 13
                    $strpos = $offset;
690 13
                    if ('(' == $char) {
691 13
                        $open_bracket = 1;
692 13
                        while ($open_bracket > 0) {
693 13
                            if (!isset($text_part[$strpos])) {
694
                                break;
695
                            }
696 13
                            $ch = $text_part[$strpos];
697 13
                            switch ($ch) {
698 13
                                case '\\':
699
                                 // REVERSE SOLIDUS (5Ch) (Backslash)
700
                                    // skip next character
701 10
                                    ++$strpos;
702 10
                                    break;
703
704 13
                                case '(':
705
                                 // LEFT PARENHESIS (28h)
706
                                    ++$open_bracket;
707
                                    break;
708
709 13
                                case ')':
710
                                 // RIGHT PARENTHESIS (29h)
711 13
                                    --$open_bracket;
712 13
                                    break;
713
                            }
714 13
                            ++$strpos;
715
                        }
716 13
                        $command = substr($text_part, $offset, ($strpos - $offset - 1));
717 13
                        $offset = $strpos;
718
719 13
                        if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) {
720 11
                            $operator = $matches[1];
721 11
                            $offset += \strlen($matches[0]);
722
                        }
723
                    }
724 13
                    break;
725
726
                default:
727 20
                    if ('ET' == substr($text_part, $offset, 2)) {
728 1
                        break;
729 20
                    } elseif (preg_match(
730 20
                        '/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
731 20
                        substr($text_part, $offset),
732
                        $matches
733
                    )
734
                    ) {
735 20
                        $operator = trim($matches['id']);
736 20
                        $command = trim($matches['data']);
737 20
                        $offset += \strlen($matches[0]);
738 18
                    } elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) {
739 17
                        $type = 'n';
740 17
                        $command = trim($matches[0]);
741 17
                        $offset += \strlen($matches[0]);
742 11
                    } elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) {
743 11
                        $type = '';
744 11
                        $operator = $matches[1];
745 11
                        $command = '';
746 11
                        $offset += \strlen($matches[0]);
747
                    }
748
            }
749
750 20
            if (false !== $command) {
751 20
                $commands[] = [
752 20
                    self::TYPE => $type,
753 20
                    self::OPERATOR => $operator,
754 20
                    self::COMMAND => $command,
755
                ];
756
            } else {
757 18
                break;
758
            }
759
        }
760
761 20
        return $commands;
762
    }
763
764
    /**
765
     * @param string $content
766
     *
767
     * @return PDFObject
768
     */
769 29
    public static function factory(
770
        Document $document,
771
        Header $header,
772
        $content,
773
        Config $config = null
774
    ) {
775 29
        switch ($header->get('Type')->getContent()) {
776 29
            case 'XObject':
777 5
                switch ($header->get('Subtype')->getContent()) {
778 5
                    case 'Image':
779 3
                        return new Image($document, $header, $config->getRetainImageContent() ? $content : null, $config);
0 ignored issues
show
Bug introduced by
The method getRetainImageContent() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

779
                        return new Image($document, $header, $config->/** @scrutinizer ignore-call */ getRetainImageContent() ? $content : null, $config);

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
780
781 3
                    case 'Form':
782 3
                        return new Form($document, $header, $content, $config);
783
                }
784
785
                return new self($document, $header, $content, $config);
786
787 29
            case 'Pages':
788 28
                return new Pages($document, $header, $content, $config);
789
790 29
            case 'Page':
791 28
                return new Page($document, $header, $content, $config);
792
793 29
            case 'Encoding':
794 5
                return new Encoding($document, $header, $content, $config);
795
796 29
            case 'Font':
797 28
                $subtype = $header->get('Subtype')->getContent();
798 28
                $classname = '\Smalot\PdfParser\Font\Font'.$subtype;
799
800 28
                if (class_exists($classname)) {
801 28
                    return new $classname($document, $header, $content, $config);
802
                }
803
804
                return new Font($document, $header, $content, $config);
805
806
            default:
807 29
                return new self($document, $header, $content, $config);
808
        }
809
    }
810
811
    /**
812
     * Returns unique id identifying the object.
813
     *
814
     * @return string
815
     */
816 13
    protected function getUniqueId()
817
    {
818 13
        return spl_object_hash($this);
819
    }
820
}
821