Passed
Pull Request — master (#318)
by
unknown
04:18
created

PDFObject::getMaxCharWidthFromFont()   B

Complexity

Conditions 9
Paths 13

Size

Total Lines 32
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 16
CRAP Score 9.0164

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 9
eloc 16
c 1
b 0
f 0
nc 13
nop 2
dl 0
loc 32
ccs 16
cts 17
cp 0.9412
crap 9.0164
rs 8.0555
1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
use Smalot\PdfParser\XObject\Form;
34
use Smalot\PdfParser\XObject\Image;
35
36
/**
37
 * Class PDFObject
38
 */
39
class PDFObject
40
{
41
    const TYPE = 't';
42
43
    const OPERATOR = 'o';
44
45
    const COMMAND = 'c';
46
47
    /**
48
     * The recursion stack.
49
     *
50
     * @var array
51
     */
52
    public static $recursionStack = [];
53
54
    /**
55
     * @var Document
56
     */
57
    protected $document = null;
58
59
    /**
60
     * @var Header
61
     */
62
    protected $header = null;
63
64
    /**
65
     * @var string
66
     */
67
    protected $content = null;
68
69
    /**
70
     * @param Header $header
71
     * @param string $content
72
     */
73 33
    public function __construct(Document $document, Header $header = null, $content = null)
74
    {
75 33
        $this->document = $document;
76 33
        $this->header = null !== $header ? $header : new Header();
77 33
        $this->content = $content;
78 33
    }
79
80 29
    public function init()
81
    {
82 29
    }
83
84
    /**
85
     * @return Header|null
86
     */
87 29
    public function getHeader()
88
    {
89 29
        return $this->header;
90
    }
91
92
    /**
93
     * @param string $name
94
     *
95
     * @return Element|PDFObject
96
     */
97 24
    public function get($name)
98
    {
99 24
        return $this->header->get($name);
100
    }
101
102
    /**
103
     * @param string $name
104
     *
105
     * @return bool
106
     */
107 23
    public function has($name)
108
    {
109 23
        return $this->header->has($name);
110
    }
111
112
    /**
113
     * @param bool $deep
114
     *
115
     * @return array
116
     */
117 7
    public function getDetails($deep = true)
118
    {
119 7
        return $this->header->getDetails($deep);
120
    }
121
122
    /**
123
     * @return string|null
124
     */
125 20
    public function getContent()
126
    {
127 20
        return $this->content;
128
    }
129
130
    /**
131
     * @param string $content
132
     */
133 15
    public function cleanContent($content, $char = 'X')
134
    {
135 15
        $char = $char[0];
136 15
        $content = str_replace(['\\\\', '\\)', '\\('], $char.$char, $content);
137
138
        // Remove image bloc with binary content
139 15
        preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, PREG_OFFSET_CAPTURE);
140 15
        foreach ($matches[0] as $part) {
141 2
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
142
        }
143
144
        // Clean content in square brackets [.....]
145 15
        preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, PREG_OFFSET_CAPTURE);
146 15
        foreach ($matches[1] as $part) {
147 10
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
148
        }
149
150
        // Clean content in round brackets (.....)
151 15
        preg_match_all('/\((.*?)\)/s', $content, $matches, PREG_OFFSET_CAPTURE);
152 15
        foreach ($matches[1] as $part) {
153 11
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
154
        }
155
156
        // Clean structure
157 15
        if ($parts = preg_split('/(<|>)/s', $content, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE)) {
158 15
            $content = '';
159 15
            $level = 0;
160 15
            foreach ($parts as $part) {
161 15
                if ('<' == $part) {
162 10
                    ++$level;
163
                }
164
165 15
                $content .= (0 == $level ? $part : str_repeat($char, \strlen($part)));
166
167 15
                if ('>' == $part) {
168 10
                    --$level;
169
                }
170
            }
171
        }
172
173
        // Clean BDC and EMC markup
174 15
        preg_match_all(
175 15
            '/(\/[A-Za-z0-9\_]*\s*'.preg_quote($char).'*BDC)/s',
176
            $content,
177
            $matches,
178 15
            PREG_OFFSET_CAPTURE
179
        );
180 15
        foreach ($matches[1] as $part) {
181 2
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
182
        }
183
184 15
        preg_match_all('/\s(EMC)\s/s', $content, $matches, PREG_OFFSET_CAPTURE);
185 15
        foreach ($matches[1] as $part) {
186 6
            $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
187
        }
188
189 15
        return $content;
190
    }
191
192
    /**
193
     * @param string $content
194
     *
195
     * @return array
196
     */
197 14
    public function getSectionsText($content)
198
    {
199 14
        $sections = [];
200 14
        $content = ' '.$content.' ';
201 14
        $textCleaned = $this->cleanContent($content, '_');
202
203
        // Extract text blocks.
204 14
        if (preg_match_all('/\s+BT[\s|\(|\[]+(.*?)\s*ET/s', $textCleaned, $matches, PREG_OFFSET_CAPTURE)) {
205 14
            foreach ($matches[1] as $part) {
206 14
                $text = $part[0];
207 14
                if ('' === $text) {
208
                    continue;
209
                }
210 14
                $offset = $part[1];
211 14
                $section = substr($content, $offset, \strlen($text));
212
213
                // Removes BDC and EMC markup.
214 14
                $section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section.' ');
215
216 14
                $sections[] = $section;
217
            }
218
        }
219
220
        // Extract 'do' commands.
221 14
        if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, PREG_OFFSET_CAPTURE)) {
222 5
            foreach ($matches[1] as $part) {
223 5
                $text = $part[0];
224 5
                $offset = $part[1];
225 5
                $section = substr($content, $offset, \strlen($text));
226
227 5
                $sections[] = $section;
228
            }
229
        }
230
231 14
        return $sections;
232
    }
233
234
    /**
235
     * @param Page $page
236
     *
237
     * @return string
238
     *
239
     * @throws \Exception
240
     */
241 8
    public function getText(Page $page = null)
242
    {
243 8
        $text = '';
244 8
        $sections = $this->getSectionsText($this->content);
245 8
        $current_font = null;
246 8
        $current_font_size = 1;
247
248 8
        foreach ($this->document->getObjects() as $obj) {
249 8
            if ($obj instanceof Font) {
250 8
                $current_font = $obj;
251 8
                break;
252
            }
253
        }
254
255 8
        if (null === $current_font) {
256
            $current_font = new Font($this->document);
257
        }
258
259 8
        $current_position_td = ['x' => false, 'y' => false];
260 8
        $current_position_tm = ['x' => false, 'y' => false];
261
262 8
        array_push(self::$recursionStack, $this->getUniqueId());
263
264 8
        foreach ($sections as $section) {
265 8
            $commands = $this->getCommandsText($section);
266
267 8
            foreach ($commands as $command) {
268 8
                switch ($command[self::OPERATOR]) {
269
                    // set character spacing
270 8
                    case 'Tc':
271 1
                        break;
272
273
                    // move text current point
274 8
                    case 'Td':
275 6
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
276 6
                        $y = array_pop($args);
0 ignored issues
show
Bug introduced by
It seems like $args can also be of type false; however, parameter $array of array_pop() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

276
                        $y = array_pop(/** @scrutinizer ignore-type */ $args);
Loading history...
277 6
                        $x = array_pop($args);
278
279
                        /*
280
                         * When converting pdf into text some offsets between text should at least outputted as spaces.
281
                         * But small move-text operations may be normal, see https://github.com/smalot/pdfparser/issues/201
282
                         *
283
                         * To find out what we are dealing with, get the max char width for current font and check if offset is bigger than that
284
                         */
285
286 6
                        $fontMaxWidth = static::getMaxCharWidthFromFont($current_font, $current_font_size);
287
288 6
                        $looksLikeCarriageReturn = (float) ($x) <= 0;
289
290 6
                        $movingVertically = false !== $current_position_td['y']
0 ignored issues
show
introduced by
The condition false !== $current_position_td['y'] is always false.
Loading history...
291 6
                            && (float) ($y) < (float) ($current_position_td['y']);
292
293 6
                        $lookLikeSpaceChar = ((float) ($x) - (float) ($current_position_td['x'])) > $fontMaxWidth;
294
295 6
                        if ($looksLikeCarriageReturn || $movingVertically) {
296
                            // vertical offset
297 6
                            $text .= "\n";
298 6
                        } elseif (false !== $current_position_td['x'] && $lookLikeSpaceChar) {
299
                            // horizontal offset
300 4
                            $text .= ' ';
301
                        }
302
303 6
                        $current_position_td = ['x' => $x, 'y' => $y];
304 6
                        break;
305
306
                    // move text current point and set leading
307 8
                    case 'TD':
308 1
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
309 1
                        $y = array_pop($args);
310 1
                        $x = array_pop($args);
311 1
                        if ((float) $y < 0) {
312 1
                            $text .= "\n";
313
                        } elseif ((float) $x <= 0) {
314
                            $text .= ' ';
315
                        }
316 1
                        break;
317
318 8
                    case 'Tf':
319 8
                        list($id, $size) = preg_split('/\s/s', $command[self::COMMAND]);
320 8
                        $current_font_size = (float) $size;
321 8
                        $id = trim($id, '/');
322 8
                        if (null !== $page) {
323 8
                            $current_font = $page->getFont($id);
324
                        }
325 8
                        break;
326
327 8
                    case "'":
328 8
                    case 'Tj':
329 6
                        $command[self::COMMAND] = [$command];
330
                        // no break
331 8
                    case 'TJ':
332
                        // Skip if not previously defined, should never happened.
333 8
                        if (null === $current_font) {
334
                            // Fallback
335
                            // TODO : Improve
336 1
                            $text .= $command[self::COMMAND][0][self::COMMAND];
337 1
                            break;
338
                        }
339
340 8
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
341 8
                        $text .= $sub_text;
342 8
                        break;
343
344
                    // set leading
345 8
                    case 'TL':
346 3
                        $text .= ' ';
347 3
                        break;
348
349 8
                    case 'Tm':
350 8
                        $args = preg_split('/\s/s', $command[self::COMMAND]);
351 8
                        $y = array_pop($args);
352 8
                        $x = array_pop($args);
353 8
                        if (false !== $current_position_tm['x']) {
354 8
                            $delta = abs((float) $x - (float) ($current_position_tm['x']));
355 8
                            if ($delta > 10) {
356 4
                                $text .= "\t";
357
                            }
358
                        }
359 8
                        if (false !== $current_position_tm['y']) {
360 8
                            $delta = abs((float) $y - (float) ($current_position_tm['y']));
361 8
                            if ($delta > 10) {
362 5
                                $text .= "\n";
363
                            }
364
                        }
365 8
                        $current_position_tm = ['x' => $x, 'y' => $y];
366 8
                        break;
367
368
                    // set super/subscripting text rise
369 7
                    case 'Ts':
370
                        break;
371
372
                    // set word spacing
373 7
                    case 'Tw':
374
                        break;
375
376
                    // set horizontal scaling
377 7
                    case 'Tz':
378
                        $text .= "\n";
379
                        break;
380
381
                    // move to start of next line
382 7
                    case 'T*':
383 2
                        $text .= "\n";
384 2
                        break;
385
386 6
                    case 'Da':
387
                        break;
388
389 6
                    case 'Do':
390 5
                        if (null !== $page) {
391 5
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
392 5
                            $id = trim(array_pop($args), '/ ');
393 5
                            $xobject = $page->getXObject($id);
394
395
                            // @todo $xobject could be a ElementXRef object, which would then throw an error
396 5
                            if (\is_object($xobject) && $xobject instanceof self && !\in_array($xobject->getUniqueId(), self::$recursionStack)) {
397
                                // Not a circular reference.
398 5
                                $text .= $xobject->getText($page);
399
                            }
400
                        }
401 5
                        break;
402
403 4
                    case 'rg':
404 4
                    case 'RG':
405 2
                        break;
406
407 4
                    case 're':
408
                        break;
409
410 4
                    case 'co':
411
                        break;
412
413 4
                    case 'cs':
414 1
                        break;
415
416 4
                    case 'gs':
417 3
                        break;
418
419 4
                    case 'en':
420
                        break;
421
422 4
                    case 'sc':
423 4
                    case 'SC':
424
                        break;
425
426 4
                    case 'g':
427 3
                    case 'G':
428 3
                        break;
429
430 2
                    case 'V':
431
                        break;
432
433 2
                    case 'vo':
434 2
                    case 'Vo':
435
                        break;
436
437
                    default:
438
                }
439
            }
440
        }
441
442 8
        array_pop(self::$recursionStack);
443
444 8
        return $text.' ';
445
    }
446
447
    /**
448
     * @param Page $page
449
     *
450
     * @return array
451
     *
452
     * @throws \Exception
453
     */
454
    public function getTextArray(Page $page = null)
455
    {
456
        $text = [];
457
        $sections = $this->getSectionsText($this->content);
458
        $current_font = new Font($this->document);
459
460
        foreach ($sections as $section) {
461
            $commands = $this->getCommandsText($section);
462
463
            foreach ($commands as $command) {
464
                switch ($command[self::OPERATOR]) {
465
                    // set character spacing
466
                    case 'Tc':
467
                        break;
468
469
                    // move text current point
470
                    case 'Td':
471
                        break;
472
473
                    // move text current point and set leading
474
                    case 'TD':
475
                        break;
476
477
                    case 'Tf':
478
                        list($id) = preg_split('/\s/s', $command[self::COMMAND]);
479
                        $id = trim($id, '/');
480
                        $current_font = $page->getFont($id);
0 ignored issues
show
Bug introduced by
The method getFont() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

480
                        /** @scrutinizer ignore-call */ 
481
                        $current_font = $page->getFont($id);

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
481
                        break;
482
483
                    case "'":
484
                    case 'Tj':
485
                        $command[self::COMMAND] = [$command];
486
                        // no break
487
                    case 'TJ':
488
                        // Skip if not previously defined, should never happened.
489
                        if (null === $current_font) {
490
                            // Fallback
491
                            // TODO : Improve
492
                            $text[] = $command[self::COMMAND][0][self::COMMAND];
493
                            break;
494
                        }
495
496
                        $sub_text = $current_font->decodeText($command[self::COMMAND]);
497
                        $text[] = $sub_text;
498
                        break;
499
500
                    // set leading
501
                    case 'TL':
502
                        break;
503
504
                    case 'Tm':
505
                        break;
506
507
                    // set super/subscripting text rise
508
                    case 'Ts':
509
                        break;
510
511
                    // set word spacing
512
                    case 'Tw':
513
                        break;
514
515
                    // set horizontal scaling
516
                    case 'Tz':
517
                        //$text .= "\n";
518
                        break;
519
520
                    // move to start of next line
521
                    case 'T*':
522
                        //$text .= "\n";
523
                        break;
524
525
                    case 'Da':
526
                        break;
527
528
                    case 'Do':
529
                        if (null !== $page) {
530
                            $args = preg_split('/\s/s', $command[self::COMMAND]);
531
                            $id = trim(array_pop($args), '/ ');
0 ignored issues
show
Bug introduced by
It seems like $args can also be of type false; however, parameter $array of array_pop() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

531
                            $id = trim(array_pop(/** @scrutinizer ignore-type */ $args), '/ ');
Loading history...
532
                            if ($xobject = $page->getXObject($id)) {
533
                                $text[] = $xobject->getText($page);
534
                            }
535
                        }
536
                        break;
537
538
                    case 'rg':
539
                    case 'RG':
540
                        break;
541
542
                    case 're':
543
                        break;
544
545
                    case 'co':
546
                        break;
547
548
                    case 'cs':
549
                        break;
550
551
                    case 'gs':
552
                        break;
553
554
                    case 'en':
555
                        break;
556
557
                    case 'sc':
558
                    case 'SC':
559
                        break;
560
561
                    case 'g':
562
                    case 'G':
563
                        break;
564
565
                    case 'V':
566
                        break;
567
568
                    case 'vo':
569
                    case 'Vo':
570
                        break;
571
572
                    default:
573
                }
574
            }
575
        }
576
577
        return $text;
578
    }
579
580
    /**
581
     * @param string $text_part
582
     * @param int    $offset
583
     *
584
     * @return array
585
     */
586 14
    public function getCommandsText($text_part, &$offset = 0)
587
    {
588 14
        $commands = $matches = [];
589
590 14
        while ($offset < \strlen($text_part)) {
591 14
            $offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset);
592 14
            $char = $text_part[$offset];
593
594 14
            $operator = '';
595 14
            $type = '';
596 14
            $command = false;
597
598 14
            switch ($char) {
599 14
                case '/':
600 14
                    $type = $char;
601 14
                    if (preg_match(
602 14
                        '/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
603 14
                        substr($text_part, $offset),
604
                        $matches
605
                    )
606
                    ) {
607 14
                        $operator = $matches[2];
608 14
                        $command = $matches[1];
609 14
                        $offset += \strlen($matches[0]);
610
                    } elseif (preg_match(
611 6
                        '/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si',
612 6
                        substr($text_part, $offset),
613
                        $matches
614
                    )
615
                    ) {
616 6
                        $operator = $matches[2];
617 6
                        $command = $matches[1];
618 6
                        $offset += \strlen($matches[0]);
619
                    }
620 14
                    break;
621
622 14
                case '[':
623 14
                case ']':
624
                    // array object
625 11
                    $type = $char;
626 11
                    if ('[' == $char) {
627 11
                        ++$offset;
628
                        // get elements
629 11
                        $command = $this->getCommandsText($text_part, $offset);
630
631 11
                        if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
632 11
                            $operator = trim($matches[0]);
633 11
                            $offset += \strlen($matches[0]);
634
                        }
635
                    } else {
636 11
                        ++$offset;
637 11
                        break;
638
                    }
639 11
                    break;
640
641 14
                case '<':
642 14
                case '>':
643
                    // array object
644 6
                    $type = $char;
645 6
                    ++$offset;
646 6
                    if ('<' == $char) {
647 6
                        $strpos = strpos($text_part, '>', $offset);
648 6
                        $command = substr($text_part, $offset, ($strpos - $offset));
649 6
                        $offset = $strpos + 1;
650
                    }
651
652 6
                    if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
653 5
                        $operator = trim($matches[0]);
654 5
                        $offset += \strlen($matches[0]);
655
                    }
656 6
                    break;
657
658 14
                case '(':
659 14
                case ')':
660 10
                    ++$offset;
661 10
                    $type = $char;
662 10
                    $strpos = $offset;
663 10
                    if ('(' == $char) {
664 10
                        $open_bracket = 1;
665 10
                        while ($open_bracket > 0) {
666 10
                            if (!isset($text_part[$strpos])) {
667
                                break;
668
                            }
669 10
                            $ch = $text_part[$strpos];
670 10
                            switch ($ch) {
671 10
                                case '\\':
672
                                 // REVERSE SOLIDUS (5Ch) (Backslash)
673
                                    // skip next character
674 9
                                    ++$strpos;
675 9
                                    break;
676
677 10
                                case '(':
678
                                 // LEFT PARENHESIS (28h)
679
                                    ++$open_bracket;
680
                                    break;
681
682 10
                                case ')':
683
                                 // RIGHT PARENTHESIS (29h)
684 10
                                    --$open_bracket;
685 10
                                    break;
686
                            }
687 10
                            ++$strpos;
688
                        }
689 10
                        $command = substr($text_part, $offset, ($strpos - $offset - 1));
690 10
                        $offset = $strpos;
691
692 10
                        if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) {
693 9
                            $operator = $matches[1];
694 9
                            $offset += \strlen($matches[0]);
695
                        }
696
                    }
697 10
                    break;
698
699
                default:
700
701 14
                    if ('ET' == substr($text_part, $offset, 2)) {
702 1
                        break;
703
                    } elseif (preg_match(
704 14
                        '/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
705 14
                        substr($text_part, $offset),
706
                        $matches
707
                    )
708
                    ) {
709 14
                        $operator = trim($matches['id']);
710 14
                        $command = trim($matches['data']);
711 14
                        $offset += \strlen($matches[0]);
712 11
                    } elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) {
713 11
                        $type = 'n';
714 11
                        $command = trim($matches[0]);
715 11
                        $offset += \strlen($matches[0]);
716 8
                    } elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) {
717 8
                        $type = '';
718 8
                        $operator = $matches[1];
719 8
                        $command = '';
720 8
                        $offset += \strlen($matches[0]);
721
                    }
722
            }
723
724 14
            if (false !== $command) {
725 14
                $commands[] = [
726 14
                    self::TYPE => $type,
727 14
                    self::OPERATOR => $operator,
728 14
                    self::COMMAND => $command,
729
                ];
730
            } else {
731 11
                break;
732
            }
733
        }
734
735 14
        return $commands;
736
    }
737
738
    /**
739
     * @param string $content
740
     *
741
     * @return PDFObject
742
     */
743 21
    public static function factory(Document $document, Header $header, $content)
744
    {
745 21
        switch ($header->get('Type')->getContent()) {
746 21
            case 'XObject':
747 6
                switch ($header->get('Subtype')->getContent()) {
748 6
                    case 'Image':
749 5
                        return new Image($document, $header, $content);
750
751 2
                    case 'Form':
752 2
                        return new Form($document, $header, $content);
753
                }
754
755
                return new self($document, $header, $content);
756
757 21
            case 'Pages':
758 21
                return new Pages($document, $header, $content);
759
760 21
            case 'Page':
761 21
                return new Page($document, $header, $content);
762
763 21
            case 'Encoding':
764 3
                return new Encoding($document, $header, $content);
765
766 21
            case 'Font':
767 21
                $subtype = $header->get('Subtype')->getContent();
768 21
                $classname = '\Smalot\PdfParser\Font\Font'.$subtype;
769
770 21
                if (class_exists($classname)) {
771 21
                    return new $classname($document, $header, $content);
772
                }
773
774
                return new Font($document, $header, $content);
775
776
            default:
777 21
                return new self($document, $header, $content);
778
        }
779
    }
780
781
    /**
782
     * Returns unique id identifying the object.
783
     *
784
     * @return string
785
     */
786 8
    protected function getUniqueId()
787
    {
788 8
        return spl_object_hash($this);
789
    }
790
791
    /**
792
     * This function returns an unscaled text space units for the widest char in current font
793
     *
794
     * @param Font  $currentFont
795
     * @param float $currentFontSize
796
     *
797
     * @return float
798
     */
799 6
    protected static function getMaxCharWidthFromFont($currentFont, $currentFontSize)
800
    {
801 6
        if (null === $currentFont) {
802 1
            return 0;
803
        }
804 6
        $fontDictonary = $currentFont->getDetails();
805
        // type 0
806 6
        if (isset($fontDictonary['DescendantFonts'])) {
807 3
            $fontDictonary = $fontDictonary['DescendantFonts'][0];
808
        }
809
        // type 1
810 6
        if (isset($fontDictonary['Widths'])) {
811 3
            $fontMaxWidth = 0;
812 3
            foreach ($fontDictonary['Widths'] as $width) {
813 3
                if (($floatWidth = (float) $width) > $fontMaxWidth) {
814 3
                    $fontMaxWidth = $floatWidth;
815
                }
816
            }
817
818 3
            return ($fontMaxWidth / 1000) * $currentFontSize;
819
        }
820
        // CIDFontType2
821 5
        if ('cidfonttype2' === strtolower($fontDictonary['Type'])) {
822 3
            if (isset($fontDictonary['DW']) && $fontDictonary['DW']) {
823
                // defaultWidth / maxWidth for CIDFonts
824
                return ((float) $fontDictonary['DW'] / 1000) * $currentFontSize;
825
            }
826
827 3
            return $currentFontSize;
828
        }
829
830 2
        return 0;
831
    }
832
}
833