Test Failed
Pull Request — master (#500)
by
unknown
01:54
created

Font::decodeContent()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 15
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 4

Importance

Changes 7
Bugs 0 Features 1
Metric Value
cc 4
eloc 7
c 7
b 0
f 1
nc 4
nop 2
dl 0
loc 15
ccs 8
cts 8
cp 1
crap 4
rs 10
1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
use LogicException;
34
use Smalot\PdfParser\Encoding\WinAnsiEncoding;
35
use Smalot\PdfParser\Exception\EncodingNotFoundException;
36
37
/**
38
 * Class Font
39
 */
40
class Font extends PDFObject
41
{
42
    const MISSING = '?';
43
44
    /**
45
     * @var array
46
     */
47
    protected $table = null;
48
49
    /**
50
     * @var array
51
     */
52
    protected $tableSizes = null;
53
54
    /**
55
     * Caches results from uchr.
56
     *
57
     * @var array
58
     */
59
    private static $uchrCache = [];
60
61
    /**
62
     * In some pdf-files (@see https://github.com/smalot/pdfparser/pull/500) encoding could be referenced by object id
63
     * but object itself not contains `/Type /Encoding` in its dictionary. These objects wouldn't be initialized as
64
     * Encoding in \Smalot\PdfParser\PDFObject::factory() during file parsing (they would be just PDFObject).
65
     *
66
     * Therefore, we create Encoding instance from them during decoding and cache this value in this property.
67
     *
68
     * @var Encoding
69
     */
70
    private $initializedEncodingByPdfObject;
71
72 36
    public function init()
73
    {
74
        // Load translate table.
75 36
        $this->loadTranslateTable();
76 36
    }
77
78 2
    public function getName(): string
79
    {
80 2
        return $this->has('BaseFont') ? (string) $this->get('BaseFont') : '[Unknown]';
81
    }
82
83 2
    public function getType(): string
84
    {
85 2
        return (string) $this->header->get('Subtype');
86
    }
87
88 1
    public function getDetails(bool $deep = true): array
89
    {
90 1
        $details = [];
91
92 1
        $details['Name'] = $this->getName();
93 1
        $details['Type'] = $this->getType();
94 1
        $details['Encoding'] = ($this->has('Encoding') ? (string) $this->get('Encoding') : 'Ansi');
95
96 1
        $details += parent::getDetails($deep);
97
98 1
        return $details;
99
    }
100
101
    /**
102
     * @return string|bool
103
     */
104 21
    public function translateChar(string $char, bool $use_default = true)
105
    {
106 21
        $dec = hexdec(bin2hex($char));
107
108 21
        if (\array_key_exists($dec, $this->table)) {
109 18
            return $this->table[$dec];
110
        }
111
112
        // fallback for decoding single-byte ANSI characters that are not in the lookup table
113 6
        $fallbackDecoded = $char;
114
        if (
115 6
            \strlen($char) < 2
116 6
            && $this->has('Encoding')
117 6
            && $this->get('Encoding') instanceof Encoding
0 ignored issues
show
introduced by
$this->get('Encoding') is never a sub-type of Smalot\PdfParser\Encoding.
Loading history...
118
        ) {
119
            try {
120 2
                if (WinAnsiEncoding::class === $this->get('Encoding')->__toString()) {
121 1
                    $fallbackDecoded = self::uchr($dec);
122
                }
123 1
            } catch (EncodingNotFoundException $e) {
124
                // Encoding->getEncodingClass() throws EncodingNotFoundException when BaseEncoding doesn't exists
125
                // See table 5.11 on PDF 1.5 specs for more info
126
            }
127
        }
128
129 6
        return $use_default ? self::MISSING : $fallbackDecoded;
130
    }
131
132 34
    public static function uchr(int $code): string
133
    {
134 34
        if (!isset(self::$uchrCache[$code])) {
135
            // html_entity_decode() will not work with UTF-16 or UTF-32 char entities,
136
            // therefore, we use mb_convert_encoding() instead
137 12
            self::$uchrCache[$code] = mb_convert_encoding("&#{$code};", 'UTF-8', 'HTML-ENTITIES');
138
        }
139
140 34
        return self::$uchrCache[$code];
141
    }
142
143 36
    public function loadTranslateTable(): array
144
    {
145 36
        if (null !== $this->table) {
146 1
            return $this->table;
147
        }
148
149 36
        $this->table = [];
150 36
        $this->tableSizes = [
151
            'from' => 1,
152
            'to' => 1,
153
        ];
154
155 36
        if ($this->has('ToUnicode')) {
156 30
            $content = $this->get('ToUnicode')->getContent();
157 30
            $matches = [];
158
159
            // Support for multiple spacerange sections
160 30
            if (preg_match_all('/begincodespacerange(?P<sections>.*?)endcodespacerange/s', $content, $matches)) {
161 30
                foreach ($matches['sections'] as $section) {
162 30
                    $regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)>[ \r\n]+/is';
163
164 30
                    preg_match_all($regexp, $section, $matches);
165
166 30
                    $this->tableSizes = [
167 30
                        'from' => max(1, \strlen(current($matches['from'])) / 2),
168 30
                        'to' => max(1, \strlen(current($matches['to'])) / 2),
169
                    ];
170
171 30
                    break;
172
                }
173
            }
174
175
            // Support for multiple bfchar sections
176 30
            if (preg_match_all('/beginbfchar(?P<sections>.*?)endbfchar/s', $content, $matches)) {
177 12
                foreach ($matches['sections'] as $section) {
178 12
                    $regexp = '/<(?P<from>[0-9A-F]+)> +<(?P<to>[0-9A-F]+)>[ \r\n]+/is';
179
180 12
                    preg_match_all($regexp, $section, $matches);
181
182 12
                    $this->tableSizes['from'] = max(1, \strlen(current($matches['from'])) / 2);
183
184 12
                    foreach ($matches['from'] as $key => $from) {
185 12
                        $parts = preg_split(
186 12
                            '/([0-9A-F]{4})/i',
187 12
                            $matches['to'][$key],
188 12
                            0,
189 12
                            \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE
190
                        );
191 12
                        $text = '';
192 12
                        foreach ($parts as $part) {
193 12
                            $text .= self::uchr(hexdec($part));
0 ignored issues
show
Bug introduced by
It seems like hexdec($part) can also be of type double; however, parameter $code of Smalot\PdfParser\Font::uchr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

193
                            $text .= self::uchr(/** @scrutinizer ignore-type */ hexdec($part));
Loading history...
194
                        }
195 12
                        $this->table[hexdec($from)] = $text;
196
                    }
197
                }
198
            }
199
200
            // Support for multiple bfrange sections
201 30
            if (preg_match_all('/beginbfrange(?P<sections>.*?)endbfrange/s', $content, $matches)) {
202 24
                foreach ($matches['sections'] as $section) {
203
                    // Support for : <srcCode1> <srcCode2> <dstString>
204 24
                    $regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)> *<(?P<offset>[0-9A-F]+)>[ \r\n]+/is';
205
206 24
                    preg_match_all($regexp, $section, $matches);
207
208 24
                    foreach ($matches['from'] as $key => $from) {
209 24
                        $char_from = hexdec($from);
210 24
                        $char_to = hexdec($matches['to'][$key]);
211 24
                        $offset = hexdec($matches['offset'][$key]);
212
213 24
                        for ($char = $char_from; $char <= $char_to; ++$char) {
214 24
                            $this->table[$char] = self::uchr($char - $char_from + $offset);
215
                        }
216
                    }
217
218
                    // Support for : <srcCode1> <srcCodeN> [<dstString1> <dstString2> ... <dstStringN>]
219
                    // Some PDF file has 2-byte Unicode values on new lines > added \r\n
220 24
                    $regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)> *\[(?P<strings>[\r\n<>0-9A-F ]+)\][ \r\n]+/is';
221
222 24
                    preg_match_all($regexp, $section, $matches);
223
224 24
                    foreach ($matches['from'] as $key => $from) {
225 1
                        $char_from = hexdec($from);
226 1
                        $strings = [];
227
228 1
                        preg_match_all('/<(?P<string>[0-9A-F]+)> */is', $matches['strings'][$key], $strings);
229
230 1
                        foreach ($strings['string'] as $position => $string) {
231 1
                            $parts = preg_split(
232 1
                                '/([0-9A-F]{4})/i',
233
                                $string,
234 1
                                0,
235 1
                                \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE
236
                            );
237 1
                            $text = '';
238 1
                            foreach ($parts as $part) {
239 1
                                $text .= self::uchr(hexdec($part));
240
                            }
241 1
                            $this->table[$char_from + $position] = $text;
242
                        }
243
                    }
244
                }
245
            }
246
        }
247
248 36
        return $this->table;
249
    }
250
251 1
    public function setTable(array $table)
252
    {
253 1
        $this->table = $table;
254 1
    }
255
256 39
    public static function decodeHexadecimal(string $hexa, bool $add_braces = false): string
257
    {
258
        // Special shortcut for XML content.
259 39
        if (false !== stripos($hexa, '<?xml')) {
260 2
            return $hexa;
261
        }
262
263 39
        $text = '';
264 39
        $parts = preg_split('/(<[a-f0-9]+>)/si', $hexa, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
265
266 39
        foreach ($parts as $part) {
267 39
            if (preg_match('/^<.*>$/s', $part) && false === stripos($part, '<?xml')) {
268
                // strip line breaks
269 12
                $part = preg_replace("/[\r\n]/", '', $part);
270 12
                $part = trim($part, '<>');
271 12
                if ($add_braces) {
272 1
                    $text .= '(';
273
                }
274
275 12
                $part = pack('H*', $part);
276 12
                $text .= ($add_braces ? preg_replace('/\\\/s', '\\\\\\', $part) : $part);
277
278 12
                if ($add_braces) {
279 12
                    $text .= ')';
280
                }
281
            } else {
282 39
                $text .= $part;
283
            }
284
        }
285
286 39
        return $text;
287
    }
288
289 39
    public static function decodeOctal(string $text): string
290
    {
291 39
        $parts = preg_split('/(\\\\[0-7]{3})/s', $text, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
292 39
        $text = '';
293
294 39
        foreach ($parts as $part) {
295 39
            if (preg_match('/^\\\\[0-7]{3}$/', $part)) {
296 17
                $text .= \chr(octdec(trim($part, '\\')));
0 ignored issues
show
Bug introduced by
It seems like octdec(trim($part, '\')) can also be of type double; however, parameter $codepoint of chr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

296
                $text .= \chr(/** @scrutinizer ignore-type */ octdec(trim($part, '\\')));
Loading history...
297
            } else {
298 39
                $text .= $part;
299
            }
300
        }
301
302 39
        return $text;
303
    }
304
305 53
    public static function decodeEntities(string $text): string
306
    {
307 53
        $parts = preg_split('/(#\d{2})/s', $text, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
308 53
        $text = '';
309
310 53
        foreach ($parts as $part) {
311 53
            if (preg_match('/^#\d{2}$/', $part)) {
312 4
                $text .= \chr(hexdec(trim($part, '#')));
0 ignored issues
show
Bug introduced by
It seems like hexdec(trim($part, '#')) can also be of type double; however, parameter $codepoint of chr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

312
                $text .= \chr(/** @scrutinizer ignore-type */ hexdec(trim($part, '#')));
Loading history...
313
            } else {
314 53
                $text .= $part;
315
            }
316
        }
317
318 53
        return $text;
319
    }
320
321 39
    public static function decodeUnicode(string $text): string
322
    {
323 39
        if (preg_match('/^\xFE\xFF/i', $text)) {
324
            // Strip U+FEFF byte order marker.
325 24
            $decode = substr($text, 2);
326 24
            $text = '';
327 24
            $length = \strlen($decode);
328
329 24
            for ($i = 0; $i < $length; $i += 2) {
330 24
                $text .= self::uchr(hexdec(bin2hex(substr($decode, $i, 2))));
0 ignored issues
show
Bug introduced by
It seems like hexdec(bin2hex(substr($decode, $i, 2))) can also be of type double; however, parameter $code of Smalot\PdfParser\Font::uchr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

330
                $text .= self::uchr(/** @scrutinizer ignore-type */ hexdec(bin2hex(substr($decode, $i, 2))));
Loading history...
331
            }
332
        }
333
334 39
        return $text;
335
    }
336
337
    /**
338
     * @todo Deprecated, use $this->config->getFontSpaceLimit() instead.
339
     */
340 19
    protected function getFontSpaceLimit(): int
341
    {
342 19
        return $this->config->getFontSpaceLimit();
343
    }
344
345 19
    public function decodeText(array $commands): string
346
    {
347 19
        $word_position = 0;
348 19
        $words = [];
349 19
        $font_space = $this->getFontSpaceLimit();
350
351 19
        foreach ($commands as $command) {
352 19
            switch ($command[PDFObject::TYPE]) {
353 19
                case 'n':
354 15
                    if ((float) (trim($command[PDFObject::COMMAND])) < $font_space) {
355 7
                        $word_position = \count($words);
356
                    }
357 15
                    continue 2;
358 19
                case '<':
359
                    // Decode hexadecimal.
360 10
                    $text = self::decodeHexadecimal('<'.$command[PDFObject::COMMAND].'>');
361 10
                    break;
362
363
                default:
364
                    // Decode octal (if necessary).
365 12
                    $text = self::decodeOctal($command[PDFObject::COMMAND]);
366
            }
367
368
            // replace escaped chars
369 19
            $text = str_replace(
370 19
                ['\\\\', '\(', '\)', '\n', '\r', '\t', '\f', '\ '],
371 19
                ['\\', '(', ')', "\n", "\r", "\t", "\f", ' '],
372
                $text
373
            );
374
375
            // add content to result string
376 19
            if (isset($words[$word_position])) {
377 15
                $words[$word_position] .= $text;
378
            } else {
379 19
                $words[$word_position] = $text;
380
            }
381
        }
382
383 19
        foreach ($words as &$word) {
384 19
            $word = $this->decodeContent($word);
385
        }
386
387 19
        return implode(' ', $words);
388
    }
389
390
    /**
391
     * @param bool $unicode This parameter is deprecated and might be removed in a future release
392
     */
393 21
    public function decodeContent(string $text, ?bool &$unicode = null): string
0 ignored issues
show
Unused Code introduced by
The parameter $unicode is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

393
    public function decodeContent(string $text, /** @scrutinizer ignore-unused */ ?bool &$unicode = null): string

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
394
    {
395 21
        if ($this->has('ToUnicode')) {
396 18
            return $this->decodeContentByToUnicodeCMap($text);
397
        }
398
399 13
        if ($this->has('Encoding')) {
400 9
            $result = $this->decodeContentByEncoding($text);
401
402 9
            if (null !== $result) {
403 9
                return $result;
404
            }
405
        }
406
407 8
        return $this->decodeContentByAutodetectIfNecessary($text);
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->decodeCont...etectIfNecessary($text) could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
408
    }
409
410 18
    private function decodeContentByToUnicodeCMap(string $text): string
411
    {
412 18
        $bytes = $this->tableSizes['from'];
413
414 18
        if ($bytes) {
415 18
            $result = '';
416 18
            $length = \strlen($text);
417
418 18
            for ($i = 0; $i < $length; $i += $bytes) {
419 18
                $char = substr($text, $i, $bytes);
420
421 18
                if (false !== ($decoded = $this->translateChar($char, false))) {
422 18
                    $char = $decoded;
423
                } elseif ($this->has('DescendantFonts')) {
424
                    if ($this->get('DescendantFonts') instanceof PDFObject) {
425
                        $fonts = $this->get('DescendantFonts')->getHeader()->getElements();
0 ignored issues
show
Bug introduced by
The method getHeader() does not exist on Smalot\PdfParser\Element. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

425
                        $fonts = $this->get('DescendantFonts')->/** @scrutinizer ignore-call */ getHeader()->getElements();

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
426
                    } else {
427
                        $fonts = $this->get('DescendantFonts')->getContent();
428
                    }
429
                    $decoded = false;
430
431
                    foreach ($fonts as $font) {
432
                        if ($font instanceof self) {
433
                            if (false !== ($decoded = $font->translateChar($char, false))) {
434
                                $decoded = mb_convert_encoding($decoded, 'UTF-8', 'Windows-1252');
0 ignored issues
show
Bug introduced by
It seems like $decoded can also be of type true; however, parameter $string of mb_convert_encoding() does only seem to accept array|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

434
                                $decoded = mb_convert_encoding(/** @scrutinizer ignore-type */ $decoded, 'UTF-8', 'Windows-1252');
Loading history...
435
                                break;
436
                            }
437
                        }
438
                    }
439
440
                    if (false !== $decoded) {
441
                        $char = $decoded;
442
                    } else {
443
                        $char = mb_convert_encoding($char, 'UTF-8', 'Windows-1252');
444
                    }
445
                } else {
446
                    $char = self::MISSING;
447
                }
448
449 18
                $result .= $char;
450
            }
451
452 18
            $text = $result;
453
        }
454
455 18
        return $text;
456
    }
457
458
    /**
459
     * Decode content by any type of Encoding (dictionary's item) instance.
460
     */
461 9
    private function decodeContentByEncoding(string $text): ?string
462
    {
463 9
        $encoding = $this->get('Encoding');
464
465
        // When Encoding referenced by object id (/Encoding 520 0 R) but object itself does not contain `/Type /Encoding` in it's dictionary.
466 9
        if ($encoding instanceof PDFObject) {
0 ignored issues
show
introduced by
$encoding is never a sub-type of Smalot\PdfParser\PDFObject.
Loading history...
467 2
            $encoding = $this->getInitializedEncodingByPdfObject($encoding);
468
        }
469
470
        // When Encoding referenced by object id (/Encoding 520 0 R) but object itself contains `/Type /Encoding` in it's dictionary.
471 9
        if ($encoding instanceof Encoding) {
0 ignored issues
show
introduced by
$encoding is never a sub-type of Smalot\PdfParser\Encoding.
Loading history...
472 2
            return $this->decodeContentByEncodingEncoding($text, $encoding);
473
        }
474
475
        // When Encoding is just string (/Encoding /WinAnsiEncoding)
476 7
        if ($encoding instanceof Element) { //todo: ElementString class must by used?
0 ignored issues
show
introduced by
$encoding is always a sub-type of Smalot\PdfParser\Element.
Loading history...
477 7
            return $this->decodeContentByEncodingElement($text, $encoding);
478
        }
479
480
        // Encoding has unintended type.
481
        $encodingClassName = \get_class($encoding);
482
        throw new LogicException("Unknown encoding instance type: {$encodingClassName}");
483
    }
484
485 2
    private function getInitializedEncodingByPdfObject(PDFObject $PDFObject): Encoding
486
    {
487 2
        if (!$this->initializedEncodingByPdfObject) {
488 2
            $this->initializedEncodingByPdfObject = $this->createInitializedEncodingByPdfObject($PDFObject);
489
        }
490
491 2
        return $this->initializedEncodingByPdfObject;
492
    }
493
494
    /**
495
     * Decode content when Encoding is instance of Encoding.
496
     */
497 2
    private function decodeContentByEncodingEncoding(string $text, Encoding $encoding): string
498
    {
499 2
        $result = '';
500 2
        $length = \strlen($text);
501
502 2
        for ($i = 0; $i < $length; ++$i) {
503 2
            $dec_av = hexdec(bin2hex($text[$i]));
504 2
            $dec_ap = $encoding->translateChar($dec_av);
505 2
            $result .= self::uchr($dec_ap ?? $dec_av);
0 ignored issues
show
Bug introduced by
It seems like $dec_ap ?? $dec_av can also be of type double; however, parameter $code of Smalot\PdfParser\Font::uchr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

505
            $result .= self::uchr(/** @scrutinizer ignore-type */ $dec_ap ?? $dec_av);
Loading history...
506
        }
507
508 2
        return $result;
509
    }
510
511
    /**
512
     * Decode content when Encoding is instance of Element.
513
     */
514 7
    private function decodeContentByEncodingElement(string $text, Element $encoding): ?string
515
    {
516 7
        $pdfEncodingName = $encoding->getContent();
517
518
        // mb_convert_encoding does not support MacRoman/macintosh,
519
        // so we use iconv() here
520 7
        $iconvEncodingName = $this->getIconvEncodingNameOrNullByPdfEncodingName($pdfEncodingName);
521
522 7
        return $iconvEncodingName ? iconv($iconvEncodingName, 'UTF-8', $text) : null;
523
    }
524
525
    /**
526
     * Convert PDF encoding name to iconv-known encoding name.
527
     */
528 7
    private function getIconvEncodingNameOrNullByPdfEncodingName(string $pdfEncodingName): ?string
529
    {
530
        $pdfToIconvEncodingNameMap = [
531 7
            'StandardEncoding' => 'ISO-8859-1',
532
            'MacRomanEncoding' => 'MACINTOSH',
533
            'WinAnsiEncoding' => 'CP1252',
534
        ];
535
536 7
        return \array_key_exists($pdfEncodingName, $pdfToIconvEncodingNameMap)
537 7
            ? $pdfToIconvEncodingNameMap[$pdfEncodingName]
538 7
            : null;
539
    }
540
541 8
    private function decodeContentByAutodetectIfNecessary($text)
542
    {
543 8
        if (mb_check_encoding($text, 'UTF-8')) {
544 8
            return $text;
545
        }
546
547 1
        return mb_convert_encoding($text, 'UTF-8', 'Windows-1252');
548
        //todo: Why exactly `Windows-1252` used?
549
    }
550
551
    /**
552
     * Create Encoding instance by PDFObject instance and init it.
553
     */
554 2
    private function createInitializedEncodingByPdfObject(PDFObject $PDFObject): Encoding
555
    {
556 2
        $encoding = $this->createEncodingByPdfObject($PDFObject);
557 2
        $encoding->init();
558
559 2
        return $encoding;
560
    }
561
562
    /**
563
     * Create Encoding instance by PDFObject instance (without init).
564
     */
565 2
    private function createEncodingByPdfObject(PDFObject $PDFObject): Encoding
566
    {
567 2
        $document = $PDFObject->getDocument();
568 2
        $header = $PDFObject->getHeader();
569 2
        $content = $PDFObject->getContent();
570 2
        $config = $PDFObject->getConfig();
571
572 2
        return new Encoding($document, $header, $content, $config);
573
    }
574
}
575