Passed
Push — master ( fdbbb5...1f4056 )
by Konrad
02:33
created

PDFObject::cleanContent()   B

Complexity

Conditions 11
Paths 64

Size

Total Lines 57
Code Lines 31

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 29
CRAP Score 11.0044

Importance

Changes 0
Metric Value
cc 11
eloc 31
c 0
b 0
f 0
nc 64
nop 2
dl 0
loc 57
ccs 29
cts 30
cp 0.9667
crap 11.0044
rs 7.3166

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
use Smalot\PdfParser\XObject\Form;
34
use Smalot\PdfParser\XObject\Image;
35
36
/**
37
 * Class PDFObject
38
 */
39
class PDFObject
40
{
41
    const TYPE = 't';
42
43
    const OPERATOR = 'o';
44
45
    const COMMAND = 'c';
46
47
    /**
48
     * The recursion stack.
49
     *
50
     * @var array
51
     */
52
    public static $recursionStack = [];
53
54
    /**
55
     * @var Document
56
     */
57
    protected $document = null;
58
59
    /**
60
     * @var Header
61
     */
62
    protected $header = null;
63
64
    /**
65
     * @var string
66
     */
67
    protected $content = null;
68
69
    /**
70
     * @param Header $header
71
     * @param string $content
72
     */
73 35
    public function __construct(Document $document, Header $header = null, $content = null)
74
    {
75 35
        $this->document = $document;
76 35
        $this->header = null !== $header ? $header : new Header();
77 35
        $this->content = $content;
78 35
    }
79
80 30
    public function init()
81
    {
82 30
    }
83
84
    /**
85
     * @return Header|null
86
     */
87 30
    public function getHeader()
88
    {
89 30
        return $this->header;
90
    }
91
92
    /**
93
     * @param string $name
94
     *
95
     * @return Element|PDFObject
96
     */
97 25
    public function get($name)
98
    {
99 25
        return $this->header->get($name);
100
    }
101
102
    /**
103
     * @param string $name
104
     *
105
     * @return bool
106
     */
107 24
    public function has($name)
108
    {
109 24
        return $this->header->has($name);
110
    }
111
112
    /**
113
     * @param bool $deep
114
     *
115
     * @return array
116
     */
117 1
    public function getDetails($deep = true)
118
    {
119 1
        return $this->header->getDetails($deep);
120
    }
121
122
    /**
123
     * @return string|null
124
     */
125 21
    public function getContent()
126
    {
127 21
        return $this->content;
128
    }
129
130
    /**
131
     * @param string $content
132
     */
133 16
    public function cleanContent($content, $char = 'X')
134
    {
135 16
        $char = $char[0];
136 16
        $content = str_replace(['\\\\', '\\)', '\\('], $char.$char, $content);
137
138
        // Remove image bloc with binary content
139 16
        preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, PREG_OFFSET_CAPTURE);
140 16
        foreach ($matches[0] as $part) {
141
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
142
        }
143
144
        // Clean content in square brackets [.....]
145 16
        preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, PREG_OFFSET_CAPTURE);
146 16
        foreach ($matches[1] as $part) {
147 12
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
148
        }
149
150
        // Clean content in round brackets (.....)
151 16
        preg_match_all('/\((.*?)\)/s', $content, $matches, PREG_OFFSET_CAPTURE);
152 16
        foreach ($matches[1] as $part) {
153 11
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
154
        }
155
156
        // Clean structure
157 16
        if ($parts = preg_split('/(<|>)/s', $content, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE)) {
158 16
            $content = '';
159 16
            $level = 0;
160 16
            foreach ($parts as $part) {
161 16
                if ('<' == $part) {
162 11
                    ++$level;
163
                }
164
165 16
                $content .= (0 == $level ? $part : str_repeat($char, \strlen($part)));
166
167 16
                if ('>' == $part) {
168 11
                    --$level;
169
                }
170
            }
171
        }
172
173
        // Clean BDC and EMC markup
174 16
        preg_match_all(
175 16
            '/(\/[A-Za-z0-9\_]*\s*'.preg_quote($char).'*BDC)/s',
176
            $content,
177
            $matches,
178 16
            PREG_OFFSET_CAPTURE
179
        );
180 16
        foreach ($matches[1] as $part) {
181 3
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
182
        }
183
184 16
        preg_match_all('/\s(EMC)\s/s', $content, $matches, PREG_OFFSET_CAPTURE);
185 16
        foreach ($matches[1] as $part) {
186 7
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
187
        }
188
189 16
        return $content;
190
    }
191
192
    /**
193
     * @param string $content
194
     *
195
     * @return array
196
     */
197 15
    public function getSectionsText($content)
198
    {
199 15
        $sections = [];
200 15
        $content = ' '.$content.' ';
201 15
        $textCleaned = $this->cleanContent($content, '_');
202
203
        // Extract text blocks.
204 15
        if (preg_match_all('/\s+BT[\s|\(|\[]+(.*?)\s*ET/s', $textCleaned, $matches, PREG_OFFSET_CAPTURE)) {
205 15
            foreach ($matches[1] as $part) {
206 15
                $text = $part[0];
207 15
                if ('' === $text) {
208
                    continue;
209
                }
210 15
                $offset = $part[1];
211 15
                $section = substr($content, $offset, \strlen($text));
212
213
                // Removes BDC and EMC markup.
214 15
                $section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section.' ');
215
216 15
                $sections[] = $section;
217
            }
218
        }
219
220
        // Extract 'do' commands.
221 15
        if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, PREG_OFFSET_CAPTURE)) {
222 3
            foreach ($matches[1] as $part) {
223 3
                $text = $part[0];
224 3
                $offset = $part[1];
225 3
                $section = substr($content, $offset, \strlen($text));
226
227 3
                $sections[] = $section;
228
            }
229
        }
230
231 15
        return $sections;
232
    }
233
234 8
    private function getDefaultFont(Page $page = null)
235
    {
236 8
        $fonts = [];
237 8
        if (null !== $page) {
238 8
            $fonts = $page->getFonts();
239
        }
240
241 8
        $fonts = array_merge($fonts, array_values($this->document->getFonts()));
242
243 8
        if (\count($fonts) > 0) {
244 8
            return reset($fonts);
245
        }
246
247
        return new Font($this->document);
248
    }
249
250
    /**
251
     * @param Page $page
252
     *
253
     * @return string
254
     *
255
     * @throws \Exception
256
     */
257 8
    public function getText(Page $page = null)
258
    {
259 8
        $text = '';
260 8
        $sections = $this->getSectionsText($this->content);
261 8
        $current_font = $this->getDefaultFont($page);
262
263 8
        $current_position_td = ['x' => false, 'y' => false];
264 8
        $current_position_tm = ['x' => false, 'y' => false];
265
266 8
        array_push(self::$recursionStack, $this->getUniqueId());
267
268 8
        foreach ($sections as $section) {
269 8
            $commands = $this->getCommandsText($section);
270
271 8
            foreach ($commands as $command) {
272 8
                switch ($command[self::OPERATOR]) {
273
                    // set character spacing
274 8
                    case 'Tc':
275 1
                        break;
276
277
                    // move text current point
278 8
                    case 'Td':
279 6
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
280 6
                        $y = array_pop($args);
0 ignored issues
show
Bug introduced by
It seems like $args can also be of type false; however, parameter $array of array_pop() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

280
                        $y = array_pop(/** @scrutinizer ignore-type */ $args);
Loading history...
281 6
                        $x = array_pop($args);
282 6
                        if (((float) $x <= 0) ||
283 6
                            (false !== $current_position_td['y'] && (float) $y < (float) ($current_position_td['y']))
284
                        ) {
285
                            // vertical offset
286 5
                            $text .= "\n";
287 6
                        } elseif (false !== $current_position_td['x'] && (float) $x > (float) (
288 6
                                $current_position_td['x']
289
                            )
290
                        ) {
291
                            // horizontal offset
292 4
                            $text .= ' ';
293
                        }
294 6
                        $current_position_td = ['x' => $x, 'y' => $y];
295 6
                        break;
296
297
                    // move text current point and set leading
298 8
                    case 'TD':
299 2
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
300 2
                        $y = array_pop($args);
301 2
                        $x = array_pop($args);
302 2
                        if ((float) $y < 0) {
303 2
                            $text .= "\n";
304
                        } elseif ((float) $x <= 0) {
305
                            $text .= ' ';
306
                        }
307 2
                        break;
308
309 8
                    case 'Tf':
310 8
                        list($id) = preg_split('/\s/s', $command[self::COMMAND]);
311 8
                        $id = trim($id, '/');
312 8
                        if (null !== $page) {
313 8
                            $new_font = $page->getFont($id);
314
                            // If an invalid font ID is given, do not update the font.
315
                            // This should theoretically never happen, as the PDF spec states for the Tf operator:
316
                            // "The specified font value shall match a resource name in the Font entry of the default resource dictionary"
317
                            // (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf, page 435)
318
                            // But we want to make sure that malformed PDFs do not simply crash.
319 8
                            if (null !== $new_font) {
320 7
                                $current_font = $new_font;
321
                            }
322
                        }
323 8
                        break;
324
325 8
                    case "'":
326 8
                    case 'Tj':
327 5
                        $command[self::COMMAND] = [$command];
328
                        // no break
329 8
                    case 'TJ':
330 8
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
331 8
                        $text .= $sub_text;
332 8
                        break;
333
334
                    // set leading
335 7
                    case 'TL':
336 1
                        $text .= ' ';
337 1
                        break;
338
339 7
                    case 'Tm':
340 7
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
341 7
                        $y = array_pop($args);
342 7
                        $x = array_pop($args);
343 7
                        if (false !== $current_position_tm['x']) {
344 7
                            $delta = abs((float) $x - (float) ($current_position_tm['x']));
345 7
                            if ($delta > 10) {
346 5
                                $text .= "\t";
347
                            }
348
                        }
349 7
                        if (false !== $current_position_tm['y']) {
350 7
                            $delta = abs((float) $y - (float) ($current_position_tm['y']));
351 7
                            if ($delta > 10) {
352 5
                                $text .= "\n";
353
                            }
354
                        }
355 7
                        $current_position_tm = ['x' => $x, 'y' => $y];
356 7
                        break;
357
358
                    // set super/subscripting text rise
359 6
                    case 'Ts':
360
                        break;
361
362
                    // set word spacing
363 6
                    case 'Tw':
364 2
                        break;
365
366
                    // set horizontal scaling
367 6
                    case 'Tz':
368
                        $text .= "\n";
369
                        break;
370
371
                    // move to start of next line
372 6
                    case 'T*':
373 3
                        $text .= "\n";
374 3
                        break;
375
376 5
                    case 'Da':
377
                        break;
378
379 5
                    case 'Do':
380 3
                        if (null !== $page) {
381 3
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
382 3
                            $id = trim(array_pop($args), '/ ');
383 3
                            $xobject = $page->getXObject($id);
384
385
                            // @todo $xobject could be a ElementXRef object, which would then throw an error
386 3
                            if (\is_object($xobject) && $xobject instanceof self && !\in_array($xobject->getUniqueId(), self::$recursionStack)) {
387
                                // Not a circular reference.
388 3
                                $text .= $xobject->getText($page);
389
                            }
390
                        }
391 3
                        break;
392
393 5
                    case 'rg':
394 5
                    case 'RG':
395 2
                        break;
396
397 5
                    case 're':
398
                        break;
399
400 5
                    case 'co':
401
                        break;
402
403 5
                    case 'cs':
404 1
                        break;
405
406 5
                    case 'gs':
407 4
                        break;
408
409 4
                    case 'en':
410
                        break;
411
412 4
                    case 'sc':
413 4
                    case 'SC':
414
                        break;
415
416 4
                    case 'g':
417 4
                    case 'G':
418 2
                        break;
419
420 3
                    case 'V':
421
                        break;
422
423 3
                    case 'vo':
424 3
                    case 'Vo':
425
                        break;
426
427
                    default:
428
                }
429
            }
430
        }
431
432 8
        array_pop(self::$recursionStack);
433
434 8
        return $text.' ';
435
    }
436
437
    /**
438
     * @param Page $page
439
     *
440
     * @return array
441
     *
442
     * @throws \Exception
443
     */
444 3
    public function getTextArray(Page $page = null)
445
    {
446 3
        $text = [];
447 3
        $sections = $this->getSectionsText($this->content);
448 3
        $current_font = new Font($this->document);
449
450 3
        foreach ($sections as $section) {
451 3
            $commands = $this->getCommandsText($section);
452
453 3
            foreach ($commands as $command) {
454 3
                switch ($command[self::OPERATOR]) {
455
                    // set character spacing
456 3
                    case 'Tc':
457 2
                        break;
458
459
                    // move text current point
460 3
                    case 'Td':
461 3
                        break;
462
463
                    // move text current point and set leading
464 3
                    case 'TD':
465
                        break;
466
467 3
                    case 'Tf':
468 3
                        if (null !== $page) {
469 3
                            list($id) = preg_split('/\s/s', $command[self::COMMAND]);
470 3
                            $id = trim($id, '/');
471 3
                            $current_font = $page->getFont($id);
472
                        }
473 3
                        break;
474
475 3
                    case "'":
476 3
                    case 'Tj':
477 3
                        $command[self::COMMAND] = [$command];
478
                        // no break
479 3
                    case 'TJ':
480 3
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
481 3
                        $text[] = $sub_text;
482 3
                        break;
483
484
                    // set leading
485 3
                    case 'TL':
486 2
                        break;
487
488 3
                    case 'Tm':
489 2
                        break;
490
491
                    // set super/subscripting text rise
492 3
                    case 'Ts':
493
                        break;
494
495
                    // set word spacing
496 3
                    case 'Tw':
497 1
                        break;
498
499
                    // set horizontal scaling
500 3
                    case 'Tz':
501
                        //$text .= "\n";
502
                        break;
503
504
                    // move to start of next line
505 3
                    case 'T*':
506
                        //$text .= "\n";
507 2
                        break;
508
509 3
                    case 'Da':
510
                        break;
511
512 3
                    case 'Do':
513
                        if (null !== $page) {
514
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
515
                            $id = trim(array_pop($args), '/ ');
0 ignored issues
show
Bug introduced by
It seems like $args can also be of type false; however, parameter $array of array_pop() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

515
                            $id = trim(array_pop(/** @scrutinizer ignore-type */ $args), '/ ');
Loading history...
516
                            if ($xobject = $page->getXObject($id)) {
517
                                $text[] = $xobject->getText($page);
518
                            }
519
                        }
520
                        break;
521
522 3
                    case 'rg':
523 3
                    case 'RG':
524 2
                        break;
525
526 3
                    case 're':
527
                        break;
528
529 3
                    case 'co':
530
                        break;
531
532 3
                    case 'cs':
533
                        break;
534
535 3
                    case 'gs':
536
                        break;
537
538 3
                    case 'en':
539
                        break;
540
541 3
                    case 'sc':
542 3
                    case 'SC':
543
                        break;
544
545 3
                    case 'g':
546 3
                    case 'G':
547 2
                        break;
548
549 1
                    case 'V':
550
                        break;
551
552 1
                    case 'vo':
553 1
                    case 'Vo':
554
                        break;
555
556
                    default:
557
                }
558
            }
559
        }
560
561 3
        return $text;
562
    }
563
564
    /**
565
     * @param string $text_part
566
     * @param int    $offset
567
     *
568
     * @return array
569
     */
570 15
    public function getCommandsText($text_part, &$offset = 0)
571
    {
572 15
        $commands = $matches = [];
573
574 15
        while ($offset < \strlen($text_part)) {
575 15
            $offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset);
576 15
            $char = $text_part[$offset];
577
578 15
            $operator = '';
579 15
            $type = '';
580 15
            $command = false;
581
582 15
            switch ($char) {
583 15
                case '/':
584 15
                    $type = $char;
585 15
                    if (preg_match(
586 15
                        '/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
587 15
                        substr($text_part, $offset),
588
                        $matches
589
                    )
590
                    ) {
591 15
                        $operator = $matches[2];
592 15
                        $command = $matches[1];
593 15
                        $offset += \strlen($matches[0]);
594
                    } elseif (preg_match(
595 5
                        '/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si',
596 5
                        substr($text_part, $offset),
597
                        $matches
598
                    )
599
                    ) {
600 5
                        $operator = $matches[2];
601 5
                        $command = $matches[1];
602 5
                        $offset += \strlen($matches[0]);
603
                    }
604 15
                    break;
605
606 15
                case '[':
607 15
                case ']':
608
                    // array object
609 14
                    $type = $char;
610 14
                    if ('[' == $char) {
611 14
                        ++$offset;
612
                        // get elements
613 14
                        $command = $this->getCommandsText($text_part, $offset);
614
615 14
                        if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
616 14
                            $operator = trim($matches[0]);
617 14
                            $offset += \strlen($matches[0]);
618
                        }
619
                    } else {
620 14
                        ++$offset;
621 14
                        break;
622
                    }
623 14
                    break;
624
625 15
                case '<':
626 15
                case '>':
627
                    // array object
628 7
                    $type = $char;
629 7
                    ++$offset;
630 7
                    if ('<' == $char) {
631 7
                        $strpos = strpos($text_part, '>', $offset);
632 7
                        $command = substr($text_part, $offset, ($strpos - $offset));
633 7
                        $offset = $strpos + 1;
634
                    }
635
636 7
                    if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
637 6
                        $operator = trim($matches[0]);
638 6
                        $offset += \strlen($matches[0]);
639
                    }
640 7
                    break;
641
642 15
                case '(':
643 15
                case ')':
644 11
                    ++$offset;
645 11
                    $type = $char;
646 11
                    $strpos = $offset;
647 11
                    if ('(' == $char) {
648 11
                        $open_bracket = 1;
649 11
                        while ($open_bracket > 0) {
650 11
                            if (!isset($text_part[$strpos])) {
651
                                break;
652
                            }
653 11
                            $ch = $text_part[$strpos];
654 11
                            switch ($ch) {
655 11
                                case '\\':
656
                                 // REVERSE SOLIDUS (5Ch) (Backslash)
657
                                    // skip next character
658 8
                                    ++$strpos;
659 8
                                    break;
660
661 11
                                case '(':
662
                                 // LEFT PARENHESIS (28h)
663
                                    ++$open_bracket;
664
                                    break;
665
666 11
                                case ')':
667
                                 // RIGHT PARENTHESIS (29h)
668 11
                                    --$open_bracket;
669 11
                                    break;
670
                            }
671 11
                            ++$strpos;
672
                        }
673 11
                        $command = substr($text_part, $offset, ($strpos - $offset - 1));
674 11
                        $offset = $strpos;
675
676 11
                        if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) {
677 9
                            $operator = $matches[1];
678 9
                            $offset += \strlen($matches[0]);
679
                        }
680
                    }
681 11
                    break;
682
683
                default:
684
685 15
                    if ('ET' == substr($text_part, $offset, 2)) {
686 1
                        break;
687
                    } elseif (preg_match(
688 15
                        '/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
689 15
                        substr($text_part, $offset),
690
                        $matches
691
                    )
692
                    ) {
693 15
                        $operator = trim($matches['id']);
694 15
                        $command = trim($matches['data']);
695 15
                        $offset += \strlen($matches[0]);
696 13
                    } elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) {
697 13
                        $type = 'n';
698 13
                        $command = trim($matches[0]);
699 13
                        $offset += \strlen($matches[0]);
700 9
                    } elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) {
701 9
                        $type = '';
702 9
                        $operator = $matches[1];
703 9
                        $command = '';
704 9
                        $offset += \strlen($matches[0]);
705
                    }
706
            }
707
708 15
            if (false !== $command) {
709 15
                $commands[] = [
710 15
                    self::TYPE => $type,
711 15
                    self::OPERATOR => $operator,
712 15
                    self::COMMAND => $command,
713
                ];
714
            } else {
715 14
                break;
716
            }
717
        }
718
719 15
        return $commands;
720
    }
721
722
    /**
723
     * @param string $content
724
     *
725
     * @return PDFObject
726
     */
727 23
    public static function factory(Document $document, Header $header, $content)
728
    {
729 23
        switch ($header->get('Type')->getContent()) {
730 23
            case 'XObject':
731 4
                switch ($header->get('Subtype')->getContent()) {
732 4
                    case 'Image':
733 2
                        return new Image($document, $header, $content);
734
735 3
                    case 'Form':
736 3
                        return new Form($document, $header, $content);
737
                }
738
739
                return new self($document, $header, $content);
740
741 23
            case 'Pages':
742 22
                return new Pages($document, $header, $content);
743
744 23
            case 'Page':
745 22
                return new Page($document, $header, $content);
746
747 23
            case 'Encoding':
748 3
                return new Encoding($document, $header, $content);
749
750 23
            case 'Font':
751 22
                $subtype = $header->get('Subtype')->getContent();
752 22
                $classname = '\Smalot\PdfParser\Font\Font'.$subtype;
753
754 22
                if (class_exists($classname)) {
755 22
                    return new $classname($document, $header, $content);
756
                }
757
758
                return new Font($document, $header, $content);
759
760
            default:
761 23
                return new self($document, $header, $content);
762
        }
763
    }
764
765
    /**
766
     * Returns unique id identifying the object.
767
     *
768
     * @return string
769
     */
770 8
    protected function getUniqueId()
771
    {
772 8
        return spl_object_hash($this);
773
    }
774
}
775