Passed
Pull Request — master (#544)
by Konrad
07:43 queued 05:08
created

PDFObject::addQAndqFlagsAndTfCommands()   B

Complexity

Conditions 8
Paths 4

Size

Total Lines 27
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 19
CRAP Score 8

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 8
eloc 18
c 1
b 0
f 0
nc 4
nop 3
dl 0
loc 27
ccs 19
cts 19
cp 1
crap 8
rs 8.4444
1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
use Smalot\PdfParser\XObject\Form;
34
use Smalot\PdfParser\XObject\Image;
35
36
/**
37
 * Class PDFObject
38
 */
39
class PDFObject
40
{
41
    const TYPE = 't';
42
43
    const OPERATOR = 'o';
44
45
    const COMMAND = 'c';
46
47
    /**
48
     * The recursion stack.
49
     *
50
     * @var array
51
     */
52
    public static $recursionStack = [];
53
54
    /**
55
     * @var Document
56
     */
57
    protected $document = null;
58
59
    /**
60
     * @var Header
61
     */
62
    protected $header = null;
63
64
    /**
65
     * @var string
66
     */
67
    protected $content = null;
68
69
    /**
70
     * @var Config
71
     */
72
    protected $config;
73
74 60
    public function __construct(
75
        Document $document,
76
        ?Header $header = null,
77
        ?string $content = null,
78
        ?Config $config = null
79
    ) {
80 60
        $this->document = $document;
81 60
        $this->header = $header ?? new Header();
82 60
        $this->content = $content;
83 60
        $this->config = $config;
84 60
    }
85
86 47
    public function init()
87
    {
88 47
    }
89
90 3
    public function getDocument(): Document
91
    {
92 3
        return $this->document;
93
    }
94
95 47
    public function getHeader(): ?Header
96
    {
97 47
        return $this->header;
98
    }
99
100 3
    public function getConfig(): ?Config
101
    {
102 3
        return $this->config;
103
    }
104
105
    /**
106
     * @return Element|PDFObject|Header
107
     */
108 48
    public function get(string $name)
109
    {
110 48
        return $this->header->get($name);
111
    }
112
113 45
    public function has(string $name): bool
114
    {
115 45
        return $this->header->has($name);
116
    }
117
118 3
    public function getDetails(bool $deep = true): array
119
    {
120 3
        return $this->header->getDetails($deep);
121
    }
122
123 37
    public function getContent(): ?string
124
    {
125 37
        return $this->content;
126
    }
127
128 30
    public function cleanContent(string $content, string $char = 'X')
129
    {
130 30
        $char = $char[0];
131 30
        $content = str_replace(['\\\\', '\\)', '\\('], $char.$char, $content);
132
133
        // Remove image bloc with binary content
134 30
        preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
135 30
        foreach ($matches[0] as $part) {
136
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
137
        }
138
139
        // Clean content in square brackets [.....]
140 30
        preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, \PREG_OFFSET_CAPTURE);
0 ignored issues
show
Unused Code introduced by
The call to preg_match_all() has too many arguments starting with PREG_OFFSET_CAPTURE. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

140
        /** @scrutinizer ignore-call */ 
141
        preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, \PREG_OFFSET_CAPTURE);

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
141 30
        foreach ($matches[1] as $part) {
142 20
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
143
        }
144
145
        // Clean content in round brackets (.....)
146 30
        preg_match_all('/\((.*?)\)/s', $content, $matches, \PREG_OFFSET_CAPTURE);
147 30
        foreach ($matches[1] as $part) {
148 19
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
149
        }
150
151
        // Clean structure
152 30
        if ($parts = preg_split('/(<|>)/s', $content, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE)) {
0 ignored issues
show
Bug introduced by
It seems like $content can also be of type array; however, parameter $subject of preg_split() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

152
        if ($parts = preg_split('/(<|>)/s', /** @scrutinizer ignore-type */ $content, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE)) {
Loading history...
153 30
            $content = '';
154 30
            $level = 0;
155 30
            foreach ($parts as $part) {
156 30
                if ('<' == $part) {
157 17
                    ++$level;
158
                }
159
160 30
                $content .= (0 == $level ? $part : str_repeat($char, \strlen($part)));
161
162 30
                if ('>' == $part) {
163 17
                    --$level;
164
                }
165
            }
166
        }
167
168
        // Clean BDC and EMC markup
169 30
        preg_match_all(
170 30
            '/(\/[A-Za-z0-9\_]*\s*'.preg_quote($char).'*BDC)/s',
171
            $content,
172
            $matches,
173 30
            \PREG_OFFSET_CAPTURE
174
        );
175 30
        foreach ($matches[1] as $part) {
176 5
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
177
        }
178
179 30
        preg_match_all('/\s(EMC)\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
180 30
        foreach ($matches[1] as $part) {
181 9
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
182
        }
183
184 30
        return $content;
185
    }
186
187
    /**
188
     * Add Q & q flags and Tf commands which before text block.
189
     *
190
     * @see: https://github.com/smalot/pdfparser/issues/387
191
     * @see: https://github.com/smalot/pdfparser/issues/542
192
     */
193 26
    private function addQAndqFlagsAndTfCommands(string $section, $matches, int $pos): string
194
    {
195 26
        if (preg_match_all('/(?:\s|^)([Qq])(?:\s|$)/', $matches[1][$pos][0], $qMatches, \PREG_OFFSET_CAPTURE)) {
196 20
            $len = \strlen($matches[1][$pos][0]);
197 20
            $matchesCount = \count($qMatches[0]);
198 20
            for ($i = $matchesCount - 1; $i >= 0; --$i) {
199 20
                $str = substr($matches[1][$pos][0], $qMatches[0][$i][1] + 3, $len - ($qMatches[0][$i][1] + 3));
200 20
                $len = $qMatches[0][$i][1];
201 20
                if (preg_match('/\sTf(\s|$)/', $str)) {
202 2
                    $section = trim($str)."\n".$section;
203
                }
204
205 20
                if ('Q' == $qMatches[1][$i][0]) {
206 19
                    $section = "Q\n".$section;
207 19
                } elseif ('q' == $qMatches[1][$i][0]) {
208 19
                    $section = "q\n".$section;
209
                }
210
            }
211 20
            $str = substr($matches[1][$pos][0], 0, $qMatches[0][0][1]);
212 20
            if (preg_match('/\sTf(\s|$)/', $str)) {
213 20
                $section = trim($str)."\n".$section;
214
            }
215 16
        } elseif (preg_match('/\sTf(\s|$)/', $matches[1][$pos][0])) {
216 1
            $section = trim($matches[1][$pos][0])."\n".$section;
217
        }
218
219 26
        return $section;
220
    }
221
222 29
    public function getSectionsText(?string $content): array
223
    {
224 29
        $sections = [];
225 29
        $content = ' '.$content.' ';
226 29
        $textCleaned = $this->cleanContent($content, '_');
227
228
        // Extract text blocks.
229 29
        if (preg_match_all('/(.*?)\s+BT[\s|\(|\[]+(.*?)\s*ET(?=\s|$)?/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
0 ignored issues
show
Unused Code introduced by
The call to preg_match_all() has too many arguments starting with PREG_OFFSET_CAPTURE. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

229
        if (/** @scrutinizer ignore-call */ preg_match_all('/(.*?)\s+BT[\s|\(|\[]+(.*?)\s*ET(?=\s|$)?/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
230 27
            foreach ($matches[2] as $pos => $part) {
231 27
                $text = $part[0];
232 27
                if ('' === $text) {
233
                    continue;
234
                }
235 27
                $offset = $part[1];
236 27
                $section = substr($content, $offset, \strlen($text));
237
238
                // Removes BDC and EMC markup.
239 27
                $section = trim(preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section.' '));
240
241
                // Add Q & q flags and Tf commands which before text block.
242 27
                if (!empty($matches[1][$pos][0])) {
243 26
                    $section = $this->addQAndqFlagsAndTfCommands($section, $matches, $pos);
244
                }
245
246 27
                $sections[] = $section;
247
            }
248
        }
249
250
        // Extract 'do' commands.
251 29
        if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
252 5
            foreach ($matches[1] as $part) {
253 5
                $text = $part[0];
254 5
                $offset = $part[1];
255 5
                $section = substr($content, $offset, \strlen($text));
256
257 5
                $sections[] = $section;
258
            }
259
        }
260
261 29
        return $sections;
262
    }
263
264 18
    private function getDefaultFont(Page $page = null): Font
265
    {
266 18
        $fonts = [];
267 18
        if (null !== $page) {
268 17
            $fonts = $page->getFonts();
269
        }
270
271 18
        $firstFont = $this->document->getFirstFont();
272 18
        if (null !== $firstFont) {
273 16
            $fonts[] = $firstFont;
274
        }
275
276 18
        if (\count($fonts) > 0) {
277 16
            return reset($fonts);
278
        }
279
280 2
        return new Font($this->document, null, null, $this->config);
281
    }
282
283
    /**
284
     * @throws \Exception
285
     */
286 18
    public function getText(?Page $page = null): string
287
    {
288 18
        $result = '';
289 18
        $sections = $this->getSectionsText($this->content);
290 18
        $current_font = $this->getDefaultFont($page);
291 18
        $clipped_font = $current_font;
292
293 18
        $current_position_td = ['x' => false, 'y' => false];
294 18
        $current_position_tm = ['x' => false, 'y' => false];
295
296 18
        self::$recursionStack[] = $this->getUniqueId();
297
298 18
        foreach ($sections as $section) {
299 16
            $commands = $this->getCommandsText($section);
300 16
            $reverse_text = false;
301 16
            $text = '';
302
303 16
            foreach ($commands as $command) {
304 16
                switch ($command[self::OPERATOR]) {
305 16
                    case 'BMC':
306 1
                        if ('ReversedChars' == $command[self::COMMAND]) {
307 1
                            $reverse_text = true;
308
                        }
309 1
                        break;
310
311
                    // set character spacing
312 16
                    case 'Tc':
313 4
                        break;
314
315
                    // move text current point
316 16
                    case 'Td':
317 13
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
318 13
                        $y = array_pop($args);
319 13
                        $x = array_pop($args);
320 13
                        if (((float) $x <= 0) ||
321 13
                            (false !== $current_position_td['y'] && (float) $y < (float) ($current_position_td['y']))
322
                        ) {
323
                            // vertical offset
324 9
                            $text .= "\n";
325 13
                        } elseif (false !== $current_position_td['x'] && (float) $x > (float) (
326 13
                                $current_position_td['x']
327
                            )
328
                        ) {
329 9
                            $text .= $this->config->getHorizontalOffset();
330
                        }
331 13
                        $current_position_td = ['x' => $x, 'y' => $y];
332 13
                        break;
333
334
                    // move text current point and set leading
335 16
                    case 'TD':
336 1
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
337 1
                        $y = array_pop($args);
338 1
                        $x = array_pop($args);
339 1
                        if ((float) $y < 0) {
340 1
                            $text .= "\n";
341
                        } elseif ((float) $x <= 0) {
342
                            $text .= ' ';
343
                        }
344 1
                        break;
345
346 16
                    case 'Tf':
347 16
                        list($id) = preg_split('/\s/s', $command[self::COMMAND]);
348 16
                        $id = trim($id, '/');
349 16
                        if (null !== $page) {
350 16
                            $new_font = $page->getFont($id);
351
                            // If an invalid font ID is given, do not update the font.
352
                            // This should theoretically never happen, as the PDF spec states for the Tf operator:
353
                            // "The specified font value shall match a resource name in the Font entry of the default resource dictionary"
354
                            // (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf, page 435)
355
                            // But we want to make sure that malformed PDFs do not simply crash.
356 16
                            if (null !== $new_font) {
357 15
                                $current_font = $new_font;
358
                            }
359
                        }
360 16
                        break;
361
362 16
                    case 'Q':
363
                        // Use clip: restore font.
364 11
                        $current_font = $clipped_font;
365 11
                        break;
366
367 16
                    case 'q':
368
                        // Use clip: save font.
369 12
                        $clipped_font = $current_font;
370 12
                        break;
371
372 16
                    case "'":
373 16
                    case 'Tj':
374 11
                        $command[self::COMMAND] = [$command];
375
                        // no break
376 15
                    case 'TJ':
377 16
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
378 16
                        $text .= $sub_text;
379 16
                        break;
380
381
                    // set leading
382 13
                    case 'TL':
383 1
                        $text .= ' ';
384 1
                        break;
385
386 13
                    case 'Tm':
387 13
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
388 13
                        $y = array_pop($args);
389 13
                        $x = array_pop($args);
390 13
                        if (false !== $current_position_tm['x']) {
391 12
                            $delta = abs((float) $x - (float) ($current_position_tm['x']));
392 12
                            if ($delta > 10) {
393 10
                                $text .= "\t";
394
                            }
395
                        }
396 13
                        if (false !== $current_position_tm['y']) {
397 12
                            $delta = abs((float) $y - (float) ($current_position_tm['y']));
398 12
                            if ($delta > 10) {
399 8
                                $text .= "\n";
400
                            }
401
                        }
402 13
                        $current_position_tm = ['x' => $x, 'y' => $y];
403 13
                        break;
404
405
                    // set super/subscripting text rise
406 10
                    case 'Ts':
407
                        break;
408
409
                    // set word spacing
410 10
                    case 'Tw':
411 2
                        break;
412
413
                    // set horizontal scaling
414 10
                    case 'Tz':
415
                        $text .= "\n";
416
                        break;
417
418
                    // move to start of next line
419 10
                    case 'T*':
420 2
                        $text .= "\n";
421 2
                        break;
422
423 9
                    case 'Da':
424
                        break;
425
426 9
                    case 'Do':
427 5
                        if (null !== $page) {
428 5
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
429 5
                            $id = trim(array_pop($args), '/ ');
430 5
                            $xobject = $page->getXObject($id);
431
432
                            // @todo $xobject could be a ElementXRef object, which would then throw an error
433 5
                            if (\is_object($xobject) && $xobject instanceof self && !\in_array($xobject->getUniqueId(), self::$recursionStack)) {
434
                                // Not a circular reference.
435 5
                                $text .= $xobject->getText($page);
436
                            }
437
                        }
438 5
                        break;
439
440 7
                    case 'rg':
441 7
                    case 'RG':
442 1
                        break;
443
444 7
                    case 're':
445
                        break;
446
447 7
                    case 'co':
448
                        break;
449
450 7
                    case 'cs':
451 1
                        break;
452
453 7
                    case 'gs':
454 3
                        break;
455
456 7
                    case 'en':
457
                        break;
458
459 7
                    case 'sc':
460 6
                    case 'SC':
461 1
                        break;
462
463 6
                    case 'g':
464 6
                    case 'G':
465 2
                        break;
466
467 5
                    case 'V':
468
                        break;
469
470 5
                    case 'vo':
471 5
                    case 'Vo':
472
                        break;
473
474
                    default:
475
                }
476
            }
477
478
            // Fix Hebrew and other reverse text oriented languages.
479
            // @see: https://github.com/smalot/pdfparser/issues/398
480 16
            if ($reverse_text) {
481 1
                $chars = mb_str_split($text, 1, mb_internal_encoding());
0 ignored issues
show
Bug introduced by
It seems like mb_internal_encoding() can also be of type true; however, parameter $encoding of mb_str_split() does only seem to accept null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

481
                $chars = mb_str_split($text, 1, /** @scrutinizer ignore-type */ mb_internal_encoding());
Loading history...
482 1
                $text = implode('', array_reverse($chars));
483
            }
484
485 16
            $result .= $text;
486
        }
487
488 18
        return $result.' ';
489
    }
490
491
    /**
492
     * @throws \Exception
493
     */
494 6
    public function getTextArray(?Page $page = null): array
495
    {
496 6
        $text = [];
497 6
        $sections = $this->getSectionsText($this->content);
498 6
        $current_font = new Font($this->document, null, null, $this->config);
499
500 6
        foreach ($sections as $section) {
501 6
            $commands = $this->getCommandsText($section);
502
503 6
            foreach ($commands as $command) {
504 6
                switch ($command[self::OPERATOR]) {
505
                    // set character spacing
506 6
                    case 'Tc':
507 3
                        break;
508
509
                    // move text current point
510 6
                    case 'Td':
511 6
                        break;
512
513
                    // move text current point and set leading
514 6
                    case 'TD':
515
                        break;
516
517 6
                    case 'Tf':
518 6
                        if (null !== $page) {
519 6
                            list($id) = preg_split('/\s/s', $command[self::COMMAND]);
520 6
                            $id = trim($id, '/');
521 6
                            $current_font = $page->getFont($id);
522
                        }
523 6
                        break;
524
525 6
                    case "'":
526 6
                    case 'Tj':
527 5
                        $command[self::COMMAND] = [$command];
528
                        // no break
529 6
                    case 'TJ':
530 6
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
531 6
                        $text[] = $sub_text;
532 6
                        break;
533
534
                    // set leading
535 5
                    case 'TL':
536 4
                        break;
537
538 5
                    case 'Tm':
539 4
                        break;
540
541
                    // set super/subscripting text rise
542 5
                    case 'Ts':
543
                        break;
544
545
                    // set word spacing
546 5
                    case 'Tw':
547 2
                        break;
548
549
                    // set horizontal scaling
550 5
                    case 'Tz':
551
                        //$text .= "\n";
552
                        break;
553
554
                    // move to start of next line
555 5
                    case 'T*':
556
                        //$text .= "\n";
557 4
                        break;
558
559 5
                    case 'Da':
560
                        break;
561
562 5
                    case 'Do':
563
                        if (null !== $page) {
564
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
565
                            $id = trim(array_pop($args), '/ ');
566
                            if ($xobject = $page->getXObject($id)) {
567
                                $text[] = $xobject->getText($page);
568
                            }
569
                        }
570
                        break;
571
572 5
                    case 'rg':
573 5
                    case 'RG':
574 2
                        break;
575
576 5
                    case 're':
577
                        break;
578
579 5
                    case 'co':
580
                        break;
581
582 5
                    case 'cs':
583
                        break;
584
585 5
                    case 'gs':
586 1
                        break;
587
588 5
                    case 'en':
589
                        break;
590
591 5
                    case 'sc':
592 5
                    case 'SC':
593
                        break;
594
595 5
                    case 'g':
596 5
                    case 'G':
597 2
                        break;
598
599 5
                    case 'V':
600
                        break;
601
602 5
                    case 'vo':
603 5
                    case 'Vo':
604
                        break;
605
606
                    default:
607
                }
608
            }
609
        }
610
611 6
        return $text;
612
    }
613
614 27
    public function getCommandsText(string $text_part, int &$offset = 0): array
615
    {
616 27
        $commands = $matches = [];
617
618 27
        while ($offset < \strlen($text_part)) {
619 27
            $offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset);
620 27
            $char = $text_part[$offset];
621
622 27
            $operator = '';
623 27
            $type = '';
624 27
            $command = false;
625
626 27
            switch ($char) {
627 27
                case '/':
628 27
                    $type = $char;
629 27
                    if (preg_match(
630 27
                        '/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
631 27
                        substr($text_part, $offset),
632
                        $matches
633
                    )
634
                    ) {
635 27
                        $operator = $matches[2];
636 27
                        $command = $matches[1];
637 27
                        $offset += \strlen($matches[0]);
638 10
                    } elseif (preg_match(
639 10
                        '/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si',
640 10
                        substr($text_part, $offset),
641
                        $matches
642
                    )
643
                    ) {
644 10
                        $operator = $matches[2];
645 10
                        $command = $matches[1];
646 10
                        $offset += \strlen($matches[0]);
647
                    }
648 27
                    break;
649
650 27
                case '[':
651 27
                case ']':
652
                    // array object
653 23
                    $type = $char;
654 23
                    if ('[' == $char) {
655 23
                        ++$offset;
656
                        // get elements
657 23
                        $command = $this->getCommandsText($text_part, $offset);
658
659 23
                        if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
660 23
                            $operator = trim($matches[0]);
661 23
                            $offset += \strlen($matches[0]);
662
                        }
663
                    } else {
664 23
                        ++$offset;
665 23
                        break;
666
                    }
667 23
                    break;
668
669 27
                case '<':
670 27
                case '>':
671
                    // array object
672 13
                    $type = $char;
673 13
                    ++$offset;
674 13
                    if ('<' == $char) {
675 13
                        $strpos = strpos($text_part, '>', $offset);
676 13
                        $command = substr($text_part, $offset, ($strpos - $offset));
677 13
                        $offset = $strpos + 1;
678
                    }
679
680 13
                    if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
681 10
                        $operator = trim($matches[0]);
682 10
                        $offset += \strlen($matches[0]);
683
                    }
684 13
                    break;
685
686 27
                case '(':
687 27
                case ')':
688 20
                    ++$offset;
689 20
                    $type = $char;
690 20
                    $strpos = $offset;
691 20
                    if ('(' == $char) {
692 20
                        $open_bracket = 1;
693 20
                        while ($open_bracket > 0) {
694 20
                            if (!isset($text_part[$strpos])) {
695
                                break;
696
                            }
697 20
                            $ch = $text_part[$strpos];
698 20
                            switch ($ch) {
699 20
                                case '\\':
700
                                 // REVERSE SOLIDUS (5Ch) (Backslash)
701
                                    // skip next character
702 13
                                    ++$strpos;
703 13
                                    break;
704
705 20
                                case '(':
706
                                 // LEFT PARENHESIS (28h)
707
                                    ++$open_bracket;
708
                                    break;
709
710 20
                                case ')':
711
                                 // RIGHT PARENTHESIS (29h)
712 20
                                    --$open_bracket;
713 20
                                    break;
714
                            }
715 20
                            ++$strpos;
716
                        }
717 20
                        $command = substr($text_part, $offset, ($strpos - $offset - 1));
718 20
                        $offset = $strpos;
719
720 20
                        if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) {
721 16
                            $operator = $matches[1];
722 16
                            $offset += \strlen($matches[0]);
723
                        }
724
                    }
725 20
                    break;
726
727
                default:
728 27
                    if ('ET' == substr($text_part, $offset, 2)) {
729 1
                        break;
730 27
                    } elseif (preg_match(
731 27
                        '/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
732 27
                        substr($text_part, $offset),
733
                        $matches
734
                    )
735
                    ) {
736 27
                        $operator = trim($matches['id']);
737 27
                        $command = trim($matches['data']);
738 27
                        $offset += \strlen($matches[0]);
739 23
                    } elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) {
740 20
                        $type = 'n';
741 20
                        $command = trim($matches[0]);
742 20
                        $offset += \strlen($matches[0]);
743 20
                    } elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) {
744 20
                        $type = '';
745 20
                        $operator = $matches[1];
746 20
                        $command = '';
747 20
                        $offset += \strlen($matches[0]);
748
                    }
749
            }
750
751 27
            if (false !== $command) {
752 27
                $commands[] = [
753 27
                    self::TYPE => $type,
754 27
                    self::OPERATOR => $operator,
755 27
                    self::COMMAND => $command,
756
                ];
757
            } else {
758 23
                break;
759
            }
760
        }
761
762 27
        return $commands;
763
    }
764
765 40
    public static function factory(
766
        Document $document,
767
        Header $header,
768
        ?string $content,
769
        ?Config $config = null
770
    ): self {
771 40
        switch ($header->get('Type')->getContent()) {
772 40
            case 'XObject':
773 9
                switch ($header->get('Subtype')->getContent()) {
774 9
                    case 'Image':
775 4
                        return new Image($document, $header, $config->getRetainImageContent() ? $content : null, $config);
0 ignored issues
show
Bug introduced by
The method getRetainImageContent() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

775
                        return new Image($document, $header, $config->/** @scrutinizer ignore-call */ getRetainImageContent() ? $content : null, $config);

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
776
777 7
                    case 'Form':
778 7
                        return new Form($document, $header, $content, $config);
779
                }
780
781
                return new self($document, $header, $content, $config);
782
783 40
            case 'Pages':
784 39
                return new Pages($document, $header, $content, $config);
785
786 40
            case 'Page':
787 39
                return new Page($document, $header, $content, $config);
788
789 40
            case 'Encoding':
790 5
                return new Encoding($document, $header, $content, $config);
791
792 40
            case 'Font':
793 39
                $subtype = $header->get('Subtype')->getContent();
794 39
                $classname = '\Smalot\PdfParser\Font\Font'.$subtype;
795
796 39
                if (class_exists($classname)) {
797 39
                    return new $classname($document, $header, $content, $config);
798
                }
799
800
                return new Font($document, $header, $content, $config);
801
802
            default:
803 40
                return new self($document, $header, $content, $config);
804
        }
805
    }
806
807
    /**
808
     * Returns unique id identifying the object.
809
     */
810 18
    protected function getUniqueId(): string
811
    {
812 18
        return spl_object_hash($this);
813
    }
814
}
815