Passed
Pull Request — master (#318)
by
unknown
04:43
created

PDFObject::getCommandsText()   F

Complexity

Conditions 27
Paths 65

Size

Total Lines 150
Code Lines 105

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 95
CRAP Score 27.0911

Importance

Changes 0
Metric Value
cc 27
eloc 105
c 0
b 0
f 0
nc 65
nop 2
dl 0
loc 150
ccs 95
cts 100
cp 0.95
crap 27.0911
rs 3.3333

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
use Smalot\PdfParser\XObject\Form;
34
use Smalot\PdfParser\XObject\Image;
35
36
/**
37
 * Class PDFObject
38
 */
39
class PDFObject
40
{
41
    const TYPE = 't';
42
43
    const OPERATOR = 'o';
44
45
    const COMMAND = 'c';
46
47
    /**
48
     * The recursion stack.
49
     *
50
     * @var array
51
     */
52
    public static $recursionStack = [];
53
54
    /**
55
     * @var Document
56
     */
57
    protected $document = null;
58
59
    /**
60
     * @var Header
61
     */
62
    protected $header = null;
63
64
    /**
65
     * @var string
66
     */
67
    protected $content = null;
68
69
    /**
70
     * @param Header $header
71
     * @param string $content
72
     */
73 33
    public function __construct(Document $document, Header $header = null, $content = null)
74
    {
75 33
        $this->document = $document;
76 33
        $this->header = null !== $header ? $header : new Header();
77 33
        $this->content = $content;
78 33
    }
79
80 29
    public function init()
81
    {
82 29
    }
83
84
    /**
85
     * @return Header|null
86
     */
87 29
    public function getHeader()
88
    {
89 29
        return $this->header;
90
    }
91
92
    /**
93
     * @param string $name
94
     *
95
     * @return Element|PDFObject
96
     */
97 24
    public function get($name)
98
    {
99 24
        return $this->header->get($name);
100
    }
101
102
    /**
103
     * @param string $name
104
     *
105
     * @return bool
106
     */
107 23
    public function has($name)
108
    {
109 23
        return $this->header->has($name);
110
    }
111
112
    /**
113
     * @param bool $deep
114
     *
115
     * @return array
116
     */
117 7
    public function getDetails($deep = true)
118
    {
119 7
        return $this->header->getDetails($deep);
120
    }
121
122
    /**
123
     * @return string|null
124
     */
125 20
    public function getContent()
126
    {
127 20
        return $this->content;
128
    }
129
130
    /**
131
     * @param string $content
132
     */
133 15
    public function cleanContent($content, $char = 'X')
134
    {
135 15
        $char = $char[0];
136 15
        $content = str_replace(['\\\\', '\\)', '\\('], $char.$char, $content);
137
138
        // Remove image bloc with binary content
139 15
        preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, PREG_OFFSET_CAPTURE);
140 15
        foreach ($matches[0] as $part) {
141 2
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
142
        }
143
144
        // Clean content in square brackets [.....]
145 15
        preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, PREG_OFFSET_CAPTURE);
146 15
        foreach ($matches[1] as $part) {
147 10
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
148
        }
149
150
        // Clean content in round brackets (.....)
151 15
        preg_match_all('/\((.*?)\)/s', $content, $matches, PREG_OFFSET_CAPTURE);
152 15
        foreach ($matches[1] as $part) {
153 11
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
154
        }
155
156
        // Clean structure
157 15
        if ($parts = preg_split('/(<|>)/s', $content, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE)) {
158 15
            $content = '';
159 15
            $level = 0;
160 15
            foreach ($parts as $part) {
161 15
                if ('<' == $part) {
162 10
                    ++$level;
163
                }
164
165 15
                $content .= (0 == $level ? $part : str_repeat($char, \strlen($part)));
166
167 15
                if ('>' == $part) {
168 10
                    --$level;
169
                }
170
            }
171
        }
172
173
        // Clean BDC and EMC markup
174 15
        preg_match_all(
175 15
            '/(\/[A-Za-z0-9\_]*\s*'.preg_quote($char).'*BDC)/s',
176
            $content,
177
            $matches,
178 15
            PREG_OFFSET_CAPTURE
179
        );
180 15
        foreach ($matches[1] as $part) {
181 2
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
182
        }
183
184 15
        preg_match_all('/\s(EMC)\s/s', $content, $matches, PREG_OFFSET_CAPTURE);
185 15
        foreach ($matches[1] as $part) {
186 6
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
187
        }
188
189 15
        return $content;
190
    }
191
192
    /**
193
     * @param string $content
194
     *
195
     * @return array
196
     */
197 14
    public function getSectionsText($content)
198
    {
199 14
        $sections = [];
200 14
        $content = ' '.$content.' ';
201 14
        $textCleaned = $this->cleanContent($content, '_');
202
203
        // Extract text blocks.
204 14
        if (preg_match_all('/\s+BT[\s|\(|\[]+(.*?)\s*ET/s', $textCleaned, $matches, PREG_OFFSET_CAPTURE)) {
205 14
            foreach ($matches[1] as $part) {
206 14
                $text = $part[0];
207 14
                if ('' === $text) {
208
                    continue;
209
                }
210 14
                $offset = $part[1];
211 14
                $section = substr($content, $offset, \strlen($text));
212
213
                // Removes BDC and EMC markup.
214 14
                $section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section.' ');
215
216 14
                $sections[] = $section;
217
            }
218
        }
219
220
        // Extract 'do' commands.
221 14
        if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, PREG_OFFSET_CAPTURE)) {
222 5
            foreach ($matches[1] as $part) {
223 5
                $text = $part[0];
224 5
                $offset = $part[1];
225 5
                $section = substr($content, $offset, \strlen($text));
226
227 5
                $sections[] = $section;
228
            }
229
        }
230
231 14
        return $sections;
232
    }
233
234
    /**
235
     * @param Page $page
236
     *
237
     * @return string
238
     *
239
     * @throws \Exception
240
     */
241 8
    public function getText(Page $page = null)
242
    {
243 8
        $text = '';
244 8
        $sections = $this->getSectionsText($this->content);
245 8
        $current_font = null;
246 8
        $current_font_size = 1;
247
248 8
        foreach ($this->document->getObjects() as $obj) {
249 8
            if ($obj instanceof Font) {
250 8
                $current_font = $obj;
251 8
                break;
252
            }
253
        }
254
255 8
        if (null === $current_font) {
256
            $current_font = new Font($this->document);
257
        }
258
259 8
        $current_position_td = ['x' => false, 'y' => false];
260 8
        $current_position_tm = ['x' => false, 'y' => false];
261
262 8
        array_push(self::$recursionStack, $this->getUniqueId());
263
264 8
        foreach ($sections as $section) {
265 8
            $commands = $this->getCommandsText($section);
266
267 8
            foreach ($commands as $command) {
268 8
                switch ($command[self::OPERATOR]) {
269
                    // set character spacing
270 8
                    case 'Tc':
271 1
                        break;
272
273
                    // move text current point
274 8
                    case 'Td':
275 6
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
276 6
                        $y = array_pop($args);
0 ignored issues
show
Bug introduced by
It seems like $args can also be of type false; however, parameter $array of array_pop() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

276
                        $y = array_pop(/** @scrutinizer ignore-type */ $args);
Loading history...
277 6
                        $x = array_pop($args);
278
279
                        /*
280
                         * When converting pdf into text some offsets between text should at least outputted as spaces.
281
                         * But small move-text operations may be normal, see https://github.com/smalot/pdfparser/issues/201
282
                         *
283
                         * To find out what we are handling with, get the max char width for current font and check if offset is bigger than that
284
                         */
285
286 6
                        $fontMaxWidth = static::getMaxCharWidthFromFont($current_font, $current_font_size);
287
288 6
                        $looksLikeCarriageReturn = (float) ($x) <= 0;
289
290 6
                        $movingVertically = false !== $current_position_td['y']
0 ignored issues
show
introduced by
The condition false !== $current_position_td['y'] is always false.
Loading history...
291 6
                            && (float) ($y) < (float) ($current_position_td['y']);
292
293 6
                        $lookLikeSpaceChar = ((float) ($x) - (float) ($current_position_td['x'])) > $fontMaxWidth;
294
295 6
                        if ( $looksLikeCarriageReturn || $movingVertically) {
296
                            // vertical offset
297 6
                            $text .= "\n";
298 6
                        } elseif (false !== $current_position_td['x'] && $lookLikeSpaceChar) {
299
                            // horizontal offset
300 4
                            $text .= ' ';
301
                        }
302
303 6
                        $current_position_td = ['x' => $x, 'y' => $y];
304 6
                        break;
305
306
                    // move text current point and set leading
307 8
                    case 'TD':
308 1
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
309 1
                        $y = array_pop($args);
310 1
                        $x = array_pop($args);
311 1
                        if ((float) $y < 0) {
312 1
                            $text .= "\n";
313
                        } elseif ((float) $x <= 0) {
314
                            $text .= ' ';
315
                        }
316 1
                        break;
317
318 8
                    case 'Tf':
319 8
                        list($id, $size) = preg_split('/\s/s', $command[self::COMMAND]);
320 8
                        $current_font_size = (float) $size;
321 8
                        $id = trim($id, '/');
322 8
                        if (null !== $page) {
323 8
                            $current_font = $page->getFont($id);
324
                        }
325 8
                        break;
326
327 8
                    case "'":
328 8
                    case 'Tj':
329 6
                        $command[self::COMMAND] = [$command];
330
                        // no break
331 8
                    case 'TJ':
332
                        // Skip if not previously defined, should never happened.
333 8
                        if (null === $current_font) {
334
                            // Fallback
335
                            // TODO : Improve
336 1
                            $text .= $command[self::COMMAND][0][self::COMMAND];
337 1
                            break;
338
                        }
339
340 8
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
341 8
                        $text .= $sub_text;
342 8
                        break;
343
344
                    // set leading
345 8
                    case 'TL':
346 3
                        $text .= ' ';
347 3
                        break;
348
349 8
                    case 'Tm':
350 8
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
351 8
                        $y = array_pop($args);
352 8
                        $x = array_pop($args);
353 8
                        if (false !== $current_position_tm['x']) {
354 8
                            $delta = abs((float) $x - (float) ($current_position_tm['x']));
355 8
                            if ($delta > 10) {
356 4
                                $text .= "\t";
357
                            }
358
                        }
359 8
                        if (false !== $current_position_tm['y']) {
360 8
                            $delta = abs((float) $y - (float) ($current_position_tm['y']));
361 8
                            if ($delta > 10) {
362 5
                                $text .= "\n";
363
                            }
364
                        }
365 8
                        $current_position_tm = ['x' => $x, 'y' => $y];
366 8
                        break;
367
368
                    // set super/subscripting text rise
369 7
                    case 'Ts':
370
                        break;
371
372
                    // set word spacing
373 7
                    case 'Tw':
374
                        break;
375
376
                    // set horizontal scaling
377 7
                    case 'Tz':
378
                        $text .= "\n";
379
                        break;
380
381
                    // move to start of next line
382 7
                    case 'T*':
383 2
                        $text .= "\n";
384 2
                        break;
385
386 6
                    case 'Da':
387
                        break;
388
389 6
                    case 'Do':
390 5
                        if (null !== $page) {
391 5
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
392 5
                            $id = trim(array_pop($args), '/ ');
393 5
                            $xobject = $page->getXObject($id);
394
395
                            // @todo $xobject could be a ElementXRef object, which would then throw an error
396 5
                            if (\is_object($xobject) && $xobject instanceof self && !\in_array($xobject->getUniqueId(), self::$recursionStack)) {
397
                                // Not a circular reference.
398 5
                                $text .= $xobject->getText($page);
399
                            }
400
                        }
401 5
                        break;
402
403 4
                    case 'rg':
404 4
                    case 'RG':
405 2
                        break;
406
407 4
                    case 're':
408
                        break;
409
410 4
                    case 'co':
411
                        break;
412
413 4
                    case 'cs':
414 1
                        break;
415
416 4
                    case 'gs':
417 3
                        break;
418
419 4
                    case 'en':
420
                        break;
421
422 4
                    case 'sc':
423 4
                    case 'SC':
424
                        break;
425
426 4
                    case 'g':
427 3
                    case 'G':
428 3
                        break;
429
430 2
                    case 'V':
431
                        break;
432
433 2
                    case 'vo':
434 2
                    case 'Vo':
435
                        break;
436
437
                    default:
438
                }
439
            }
440
        }
441
442 8
        array_pop(self::$recursionStack);
443
444 8
        return $text.' ';
445
    }
446
447
    /**
448
     * @param Page $page
449
     *
450
     * @return array
451
     *
452
     * @throws \Exception
453
     */
454
    public function getTextArray(Page $page = null)
455
    {
456
        $text = [];
457
        $sections = $this->getSectionsText($this->content);
458
        $current_font = new Font($this->document);
459
460
        foreach ($sections as $section) {
461
            $commands = $this->getCommandsText($section);
462
463
            foreach ($commands as $command) {
464
                switch ($command[self::OPERATOR]) {
465
                    // set character spacing
466
                    case 'Tc':
467
                        break;
468
469
                    // move text current point
470
                    case 'Td':
471
                        break;
472
473
                    // move text current point and set leading
474
                    case 'TD':
475
                        break;
476
477
                    case 'Tf':
478
                        list($id) = preg_split('/\s/s', $command[self::COMMAND]);
479
                        $id = trim($id, '/');
480
                        $current_font = $page->getFont($id);
0 ignored issues
show
Bug introduced by
The method getFont() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

480
                        /** @scrutinizer ignore-call */ 
481
                        $current_font = $page->getFont($id);

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
481
                        break;
482
483
                    case "'":
484
                    case 'Tj':
485
                        $command[self::COMMAND] = [$command];
486
                        // no break
487
                    case 'TJ':
488
                        // Skip if not previously defined, should never happened.
489
                        if (null === $current_font) {
490
                            // Fallback
491
                            // TODO : Improve
492
                            $text[] = $command[self::COMMAND][0][self::COMMAND];
493
                            break;
494
                        }
495
496
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
497
                        $text[] = $sub_text;
498
                        break;
499
500
                    // set leading
501
                    case 'TL':
502
                        break;
503
504
                    case 'Tm':
505
                        break;
506
507
                    // set super/subscripting text rise
508
                    case 'Ts':
509
                        break;
510
511
                    // set word spacing
512
                    case 'Tw':
513
                        break;
514
515
                    // set horizontal scaling
516
                    case 'Tz':
517
                        //$text .= "\n";
518
                        break;
519
520
                    // move to start of next line
521
                    case 'T*':
522
                        //$text .= "\n";
523
                        break;
524
525
                    case 'Da':
526
                        break;
527
528
                    case 'Do':
529
                        if (null !== $page) {
530
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
531
                            $id = trim(array_pop($args), '/ ');
0 ignored issues
show
Bug introduced by
It seems like $args can also be of type false; however, parameter $array of array_pop() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

531
                            $id = trim(array_pop(/** @scrutinizer ignore-type */ $args), '/ ');
Loading history...
532
                            if ($xobject = $page->getXObject($id)) {
533
                                $text[] = $xobject->getText($page);
534
                            }
535
                        }
536
                        break;
537
538
                    case 'rg':
539
                    case 'RG':
540
                        break;
541
542
                    case 're':
543
                        break;
544
545
                    case 'co':
546
                        break;
547
548
                    case 'cs':
549
                        break;
550
551
                    case 'gs':
552
                        break;
553
554
                    case 'en':
555
                        break;
556
557
                    case 'sc':
558
                    case 'SC':
559
                        break;
560
561
                    case 'g':
562
                    case 'G':
563
                        break;
564
565
                    case 'V':
566
                        break;
567
568
                    case 'vo':
569
                    case 'Vo':
570
                        break;
571
572
                    default:
573
                }
574
            }
575
        }
576
577
        return $text;
578
    }
579
580
    /**
581
     * @param string $text_part
582
     * @param int    $offset
583
     *
584
     * @return array
585
     */
586 14
    public function getCommandsText($text_part, &$offset = 0)
587
    {
588 14
        $commands = $matches = [];
589
590 14
        while ($offset < \strlen($text_part)) {
591 14
            $offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset);
592 14
            $char = $text_part[$offset];
593
594 14
            $operator = '';
595 14
            $type = '';
596 14
            $command = false;
597
598 14
            switch ($char) {
599 14
                case '/':
600 14
                    $type = $char;
601 14
                    if (preg_match(
602 14
                        '/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
603 14
                        substr($text_part, $offset),
604
                        $matches
605
                    )
606
                    ) {
607 14
                        $operator = $matches[2];
608 14
                        $command = $matches[1];
609 14
                        $offset += \strlen($matches[0]);
610
                    } elseif (preg_match(
611 6
                        '/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si',
612 6
                        substr($text_part, $offset),
613
                        $matches
614
                    )
615
                    ) {
616 6
                        $operator = $matches[2];
617 6
                        $command = $matches[1];
618 6
                        $offset += \strlen($matches[0]);
619
                    }
620 14
                    break;
621
622 14
                case '[':
623 14
                case ']':
624
                    // array object
625 11
                    $type = $char;
626 11
                    if ('[' == $char) {
627 11
                        ++$offset;
628
                        // get elements
629 11
                        $command = $this->getCommandsText($text_part, $offset);
630
631 11
                        if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
632 11
                            $operator = trim($matches[0]);
633 11
                            $offset += \strlen($matches[0]);
634
                        }
635
                    } else {
636 11
                        ++$offset;
637 11
                        break;
638
                    }
639 11
                    break;
640
641 14
                case '<':
642 14
                case '>':
643
                    // array object
644 6
                    $type = $char;
645 6
                    ++$offset;
646 6
                    if ('<' == $char) {
647 6
                        $strpos = strpos($text_part, '>', $offset);
648 6
                        $command = substr($text_part, $offset, ($strpos - $offset));
649 6
                        $offset = $strpos + 1;
650
                    }
651
652 6
                    if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
653 5
                        $operator = trim($matches[0]);
654 5
                        $offset += \strlen($matches[0]);
655
                    }
656 6
                    break;
657
658 14
                case '(':
659 14
                case ')':
660 10
                    ++$offset;
661 10
                    $type = $char;
662 10
                    $strpos = $offset;
663 10
                    if ('(' == $char) {
664 10
                        $open_bracket = 1;
665 10
                        while ($open_bracket > 0) {
666 10
                            if (!isset($text_part[$strpos])) {
667
                                break;
668
                            }
669 10
                            $ch = $text_part[$strpos];
670 10
                            switch ($ch) {
671 10
                                case '\\':
672
                                 // REVERSE SOLIDUS (5Ch) (Backslash)
673
                                    // skip next character
674 9
                                    ++$strpos;
675 9
                                    break;
676
677 10
                                case '(':
678
                                 // LEFT PARENHESIS (28h)
679
                                    ++$open_bracket;
680
                                    break;
681
682 10
                                case ')':
683
                                 // RIGHT PARENTHESIS (29h)
684 10
                                    --$open_bracket;
685 10
                                    break;
686
                            }
687 10
                            ++$strpos;
688
                        }
689 10
                        $command = substr($text_part, $offset, ($strpos - $offset - 1));
690 10
                        $offset = $strpos;
691
692 10
                        if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) {
693 9
                            $operator = $matches[1];
694 9
                            $offset += \strlen($matches[0]);
695
                        }
696
                    }
697 10
                    break;
698
699
                default:
700
701 14
                    if ('ET' == substr($text_part, $offset, 2)) {
702 1
                        break;
703
                    } elseif (preg_match(
704 14
                        '/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
705 14
                        substr($text_part, $offset),
706
                        $matches
707
                    )
708
                    ) {
709 14
                        $operator = trim($matches['id']);
710 14
                        $command = trim($matches['data']);
711 14
                        $offset += \strlen($matches[0]);
712 11
                    } elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) {
713 11
                        $type = 'n';
714 11
                        $command = trim($matches[0]);
715 11
                        $offset += \strlen($matches[0]);
716 8
                    } elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) {
717 8
                        $type = '';
718 8
                        $operator = $matches[1];
719 8
                        $command = '';
720 8
                        $offset += \strlen($matches[0]);
721
                    }
722
            }
723
724 14
            if (false !== $command) {
725 14
                $commands[] = [
726 14
                    self::TYPE => $type,
727 14
                    self::OPERATOR => $operator,
728 14
                    self::COMMAND => $command,
729
                ];
730
            } else {
731 11
                break;
732
            }
733
        }
734
735 14
        return $commands;
736
    }
737
738
    /**
739
     * @param string $content
740
     *
741
     * @return PDFObject
742
     */
743 21
    public static function factory(Document $document, Header $header, $content)
744
    {
745 21
        switch ($header->get('Type')->getContent()) {
746 21
            case 'XObject':
747 6
                switch ($header->get('Subtype')->getContent()) {
748 6
                    case 'Image':
749 5
                        return new Image($document, $header, $content);
750
751 2
                    case 'Form':
752 2
                        return new Form($document, $header, $content);
753
                }
754
755
                return new self($document, $header, $content);
756
757 21
            case 'Pages':
758 21
                return new Pages($document, $header, $content);
759
760 21
            case 'Page':
761 21
                return new Page($document, $header, $content);
762
763 21
            case 'Encoding':
764 3
                return new Encoding($document, $header, $content);
765
766 21
            case 'Font':
767 21
                $subtype = $header->get('Subtype')->getContent();
768 21
                $classname = '\Smalot\PdfParser\Font\Font'.$subtype;
769
770 21
                if (class_exists($classname)) {
771 21
                    return new $classname($document, $header, $content);
772
                }
773
774
                return new Font($document, $header, $content);
775
776
            default:
777 21
                return new self($document, $header, $content);
778
        }
779
    }
780
781
    /**
782
     * Returns unique id identifying the object.
783
     *
784
     * @return string
785
     */
786 8
    protected function getUniqueId()
787
    {
788 8
        return spl_object_hash($this);
789
    }
790
791
    /**
792
     * This function returns an unscaled text space units for the widest char in current font
793
     *
794
     * @param Font $currentFont
795
     * @param float $currentFontSize
796
     * @return int
797
     */
798 6
    protected static function getMaxCharWidthFromFont($currentFont, $currentFontSize)
799
    {
800 6
        $fontMaxWidth = 0;
801 6
        if (null !== $currentFont) {
802 6
            $fontDictonary = $currentFont->getDetails();
803
            // type 0
804 6
            if (isset($fontDictonary['DescendantFonts'])) {
805 3
                $fontDictonary = $fontDictonary['DescendantFonts'][0];
806
            }
807
            // type 1
808 6
            if (isset($fontDictonary['Widths'])) {
809 3
                foreach ($fontDictonary['Widths'] as $width) {
810 3
                    if (($floatWidth = (float) $width) > $fontMaxWidth) {
811 3
                        $fontMaxWidth = $floatWidth;
812
                    }
813
                }
814
815 3
                $fontMaxWidth = ($fontMaxWidth / 1000) * $currentFontSize;
816
            }
817
            // CIDFontType2
818 6
            if ('cidfonttype2' === strtolower($fontDictonary['Type'])) {
819 3
                if (isset($fontDictonary['DW']) && $fontDictonary['DW']) {
820
                    // defaultWidth / maxWidth for CIDFonts
821
                    $fontMaxWidth = ((float) $fontDictonary['DW'] / 1000) * $currentFontSize;
822
                } else {
823
                    // default
824 3
                    $fontMaxWidth = $currentFontSize;
825
                }
826
            }
827
        }
828 6
        return $fontMaxWidth;
829
    }
830
}
831