Passed
Pull Request — master (#500)
by
unknown
02:13
created

Font::decodeContentByToUnicodeCMap()   B

Complexity

Conditions 10
Paths 2

Size

Total Lines 46
Code Lines 28

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 12
CRAP Score 25.6155

Importance

Changes 0
Metric Value
cc 10
eloc 28
c 0
b 0
f 0
nc 2
nop 1
dl 0
loc 46
ccs 12
cts 26
cp 0.4615
crap 25.6155
rs 7.6666

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
use LogicException;
34
use Smalot\PdfParser\Encoding\WinAnsiEncoding;
35
use Smalot\PdfParser\Exception\EncodingNotFoundException;
36
37
/**
38
 * Class Font
39
 */
40
class Font extends PDFObject
41
{
42
    const MISSING = '?';
43
44
    /**
45
     * @var array
46
     */
47
    protected $table = null;
48
49
    /**
50
     * @var array
51
     */
52
    protected $tableSizes = null;
53
54
    /**
55
     * Caches results from uchr.
56
     *
57
     * @var array
58
     */
59
    private static $uchrCache = [];
60
61
    /**
62
     * In some pdf-files (@see https://github.com/smalot/pdfparser/pull/500) encoding could be referenced by object id
63
     * but object itself not contains `/Type /Encoding` in its dictionary. These objects wouldn't be initialized as
64
     * Encoding in \Smalot\PdfParser\PDFObject::factory() during file parsing (they would be just PDFObject).
65
     *
66
     * Therefore, we create Encoding instance from them during decoding and cache this value in this property.
67
     *
68
     * @var Encoding
69
     */
70
    private $initializedEncodingByPdfObject;
71
72 37
    public function init()
73
    {
74
        // Load translate table.
75 37
        $this->loadTranslateTable();
76 37
    }
77
78 2
    public function getName(): string
79
    {
80 2
        return $this->has('BaseFont') ? (string) $this->get('BaseFont') : '[Unknown]';
81
    }
82
83 2
    public function getType(): string
84
    {
85 2
        return (string) $this->header->get('Subtype');
86
    }
87
88 1
    public function getDetails(bool $deep = true): array
89
    {
90 1
        $details = [];
91
92 1
        $details['Name'] = $this->getName();
93 1
        $details['Type'] = $this->getType();
94 1
        $details['Encoding'] = ($this->has('Encoding') ? (string) $this->get('Encoding') : 'Ansi');
95
96 1
        $details += parent::getDetails($deep);
97
98 1
        return $details;
99
    }
100
101
    /**
102
     * @return string|bool
103
     */
104 21
    public function translateChar(string $char, bool $use_default = true)
105
    {
106 21
        $dec = hexdec(bin2hex($char));
107
108 21
        if (\array_key_exists($dec, $this->table)) {
109 18
            return $this->table[$dec];
110
        }
111
112
        // fallback for decoding single-byte ANSI characters that are not in the lookup table
113 6
        $fallbackDecoded = $char;
114
        if (
115 6
            \strlen($char) < 2
116 6
            && $this->has('Encoding')
117 6
            && $this->get('Encoding') instanceof Encoding
0 ignored issues
show
introduced by
$this->get('Encoding') is never a sub-type of Smalot\PdfParser\Encoding.
Loading history...
118
        ) {
119
            try {
120 2
                if (WinAnsiEncoding::class === $this->get('Encoding')->__toString()) {
121 1
                    $fallbackDecoded = self::uchr($dec);
122
                }
123 1
            } catch (EncodingNotFoundException $e) {
124
                // Encoding->getEncodingClass() throws EncodingNotFoundException when BaseEncoding doesn't exists
125
                // See table 5.11 on PDF 1.5 specs for more info
126
            }
127
        }
128
129 6
        return $use_default ? self::MISSING : $fallbackDecoded;
130
    }
131
132 35
    public static function uchr(int $code): string
133
    {
134 35
        if (!isset(self::$uchrCache[$code])) {
135
            // html_entity_decode() will not work with UTF-16 or UTF-32 char entities,
136
            // therefore, we use mb_convert_encoding() instead
137 12
            self::$uchrCache[$code] = mb_convert_encoding("&#{$code};", 'UTF-8', 'HTML-ENTITIES');
138
        }
139
140 35
        return self::$uchrCache[$code];
141
    }
142
143 37
    public function loadTranslateTable(): array
144
    {
145 37
        if (null !== $this->table) {
146 1
            return $this->table;
147
        }
148
149 37
        $this->table = [];
150 37
        $this->tableSizes = [
151
            'from' => 1,
152
            'to' => 1,
153
        ];
154
155 37
        if ($this->has('ToUnicode')) {
156 30
            $content = $this->get('ToUnicode')->getContent();
157 30
            $matches = [];
158
159
            // Support for multiple spacerange sections
160 30
            if (preg_match_all('/begincodespacerange(?P<sections>.*?)endcodespacerange/s', $content, $matches)) {
161 30
                foreach ($matches['sections'] as $section) {
162 30
                    $regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)>[ \r\n]+/is';
163
164 30
                    preg_match_all($regexp, $section, $matches);
165
166 30
                    $this->tableSizes = [
167 30
                        'from' => max(1, \strlen(current($matches['from'])) / 2),
168 30
                        'to' => max(1, \strlen(current($matches['to'])) / 2),
169
                    ];
170
171 30
                    break;
172
                }
173
            }
174
175
            // Support for multiple bfchar sections
176 30
            if (preg_match_all('/beginbfchar(?P<sections>.*?)endbfchar/s', $content, $matches)) {
177 12
                foreach ($matches['sections'] as $section) {
178 12
                    $regexp = '/<(?P<from>[0-9A-F]+)> +<(?P<to>[0-9A-F]+)>[ \r\n]+/is';
179
180 12
                    preg_match_all($regexp, $section, $matches);
181
182 12
                    $this->tableSizes['from'] = max(1, \strlen(current($matches['from'])) / 2);
183
184 12
                    foreach ($matches['from'] as $key => $from) {
185 12
                        $parts = preg_split(
186 12
                            '/([0-9A-F]{4})/i',
187 12
                            $matches['to'][$key],
188 12
                            0,
189 12
                            \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE
190
                        );
191 12
                        $text = '';
192 12
                        foreach ($parts as $part) {
193 12
                            $text .= self::uchr(hexdec($part));
0 ignored issues
show
Bug introduced by
It seems like hexdec($part) can also be of type double; however, parameter $code of Smalot\PdfParser\Font::uchr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

193
                            $text .= self::uchr(/** @scrutinizer ignore-type */ hexdec($part));
Loading history...
194
                        }
195 12
                        $this->table[hexdec($from)] = $text;
196
                    }
197
                }
198
            }
199
200
            // Support for multiple bfrange sections
201 30
            if (preg_match_all('/beginbfrange(?P<sections>.*?)endbfrange/s', $content, $matches)) {
202 24
                foreach ($matches['sections'] as $section) {
203
                    // Support for : <srcCode1> <srcCode2> <dstString>
204 24
                    $regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)> *<(?P<offset>[0-9A-F]+)>[ \r\n]+/is';
205
206 24
                    preg_match_all($regexp, $section, $matches);
207
208 24
                    foreach ($matches['from'] as $key => $from) {
209 24
                        $char_from = hexdec($from);
210 24
                        $char_to = hexdec($matches['to'][$key]);
211 24
                        $offset = hexdec($matches['offset'][$key]);
212
213 24
                        for ($char = $char_from; $char <= $char_to; ++$char) {
214 24
                            $this->table[$char] = self::uchr($char - $char_from + $offset);
215
                        }
216
                    }
217
218
                    // Support for : <srcCode1> <srcCodeN> [<dstString1> <dstString2> ... <dstStringN>]
219
                    // Some PDF file has 2-byte Unicode values on new lines > added \r\n
220 24
                    $regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)> *\[(?P<strings>[\r\n<>0-9A-F ]+)\][ \r\n]+/is';
221
222 24
                    preg_match_all($regexp, $section, $matches);
223
224 24
                    foreach ($matches['from'] as $key => $from) {
225 1
                        $char_from = hexdec($from);
226 1
                        $strings = [];
227
228 1
                        preg_match_all('/<(?P<string>[0-9A-F]+)> */is', $matches['strings'][$key], $strings);
229
230 1
                        foreach ($strings['string'] as $position => $string) {
231 1
                            $parts = preg_split(
232 1
                                '/([0-9A-F]{4})/i',
233
                                $string,
234 1
                                0,
235 1
                                \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE
236
                            );
237 1
                            $text = '';
238 1
                            foreach ($parts as $part) {
239 1
                                $text .= self::uchr(hexdec($part));
240
                            }
241 1
                            $this->table[$char_from + $position] = $text;
242
                        }
243
                    }
244
                }
245
            }
246
        }
247
248 37
        return $this->table;
249
    }
250
251 1
    public function setTable(array $table)
252
    {
253 1
        $this->table = $table;
254 1
    }
255
256 40
    public static function decodeHexadecimal(string $hexa, bool $add_braces = false): string
257
    {
258
        // Special shortcut for XML content.
259 40
        if (false !== stripos($hexa, '<?xml')) {
260 2
            return $hexa;
261
        }
262
263 40
        $text = '';
264 40
        $parts = preg_split('/(<[a-f0-9]+>)/si', $hexa, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
265
266 40
        foreach ($parts as $part) {
267 40
            if (preg_match('/^<.*>$/s', $part) && false === stripos($part, '<?xml')) {
268
                // strip line breaks
269 12
                $part = preg_replace("/[\r\n]/", '', $part);
270 12
                $part = trim($part, '<>');
271 12
                if ($add_braces) {
272 1
                    $text .= '(';
273
                }
274
275 12
                $part = pack('H*', $part);
276 12
                $text .= ($add_braces ? preg_replace('/\\\/s', '\\\\\\', $part) : $part);
277
278 12
                if ($add_braces) {
279 12
                    $text .= ')';
280
                }
281
            } else {
282 40
                $text .= $part;
283
            }
284
        }
285
286 40
        return $text;
287
    }
288
289 40
    public static function decodeOctal(string $text): string
290
    {
291 40
        $parts = preg_split('/(\\\\[0-7]{3})/s', $text, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
292 40
        $text = '';
293
294 40
        foreach ($parts as $part) {
295 40
            if (preg_match('/^\\\\[0-7]{3}$/', $part)) {
296 17
                $text .= \chr(octdec(trim($part, '\\')));
0 ignored issues
show
Bug introduced by
It seems like octdec(trim($part, '\')) can also be of type double; however, parameter $codepoint of chr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

296
                $text .= \chr(/** @scrutinizer ignore-type */ octdec(trim($part, '\\')));
Loading history...
297
            } else {
298 40
                $text .= $part;
299
            }
300
        }
301
302 40
        return $text;
303
    }
304
305 54
    public static function decodeEntities(string $text): string
306
    {
307 54
        $parts = preg_split('/(#\d{2})/s', $text, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
308 54
        $text = '';
309
310 54
        foreach ($parts as $part) {
311 54
            if (preg_match('/^#\d{2}$/', $part)) {
312 4
                $text .= \chr(hexdec(trim($part, '#')));
0 ignored issues
show
Bug introduced by
It seems like hexdec(trim($part, '#')) can also be of type double; however, parameter $codepoint of chr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

312
                $text .= \chr(/** @scrutinizer ignore-type */ hexdec(trim($part, '#')));
Loading history...
313
            } else {
314 54
                $text .= $part;
315
            }
316
        }
317
318 54
        return $text;
319
    }
320
321 40
    public static function decodeUnicode(string $text): string
322
    {
323 40
        if (preg_match('/^\xFE\xFF/i', $text)) {
324
            // Strip U+FEFF byte order marker.
325 24
            $decode = substr($text, 2);
326 24
            $text = '';
327 24
            $length = \strlen($decode);
328
329 24
            for ($i = 0; $i < $length; $i += 2) {
330 24
                $text .= self::uchr(hexdec(bin2hex(substr($decode, $i, 2))));
0 ignored issues
show
Bug introduced by
It seems like hexdec(bin2hex(substr($decode, $i, 2))) can also be of type double; however, parameter $code of Smalot\PdfParser\Font::uchr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

330
                $text .= self::uchr(/** @scrutinizer ignore-type */ hexdec(bin2hex(substr($decode, $i, 2))));
Loading history...
331
            }
332
        }
333
334 40
        return $text;
335
    }
336
337
    /**
338
     * @todo Deprecated, use $this->config->getFontSpaceLimit() instead.
339
     */
340 20
    protected function getFontSpaceLimit(): int
341
    {
342 20
        return $this->config->getFontSpaceLimit();
343
    }
344
345 20
    public function decodeText(array $commands): string
346
    {
347 20
        $word_position = 0;
348 20
        $words = [];
349 20
        $font_space = $this->getFontSpaceLimit();
350
351 20
        foreach ($commands as $command) {
352 20
            switch ($command[PDFObject::TYPE]) {
353 20
                case 'n':
354 15
                    if ((float) (trim($command[PDFObject::COMMAND])) < $font_space) {
355 7
                        $word_position = \count($words);
356
                    }
357 15
                    continue 2;
358 20
                case '<':
359
                    // Decode hexadecimal.
360 10
                    $text = self::decodeHexadecimal('<'.$command[PDFObject::COMMAND].'>');
361 10
                    break;
362
363
                default:
364
                    // Decode octal (if necessary).
365 13
                    $text = self::decodeOctal($command[PDFObject::COMMAND]);
366
            }
367
368
            // replace escaped chars
369 20
            $text = str_replace(
370 20
                ['\\\\', '\(', '\)', '\n', '\r', '\t', '\f', '\ '],
371 20
                ['\\', '(', ')', "\n", "\r", "\t", "\f", ' '],
372
                $text
373
            );
374
375
            // add content to result string
376 20
            if (isset($words[$word_position])) {
377 15
                $words[$word_position] .= $text;
378
            } else {
379 20
                $words[$word_position] = $text;
380
            }
381
        }
382
383 20
        foreach ($words as &$word) {
384 20
            $word = $this->decodeContent($word);
385
        }
386
387 20
        return implode(' ', $words);
388
    }
389
390
    /**
391
     * @param bool $unicode This parameter is deprecated and might be removed in a future release
392
     */
393 22
    public function decodeContent(string $text, ?bool &$unicode = null): string
0 ignored issues
show
Unused Code introduced by
The parameter $unicode is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

393
    public function decodeContent(string $text, /** @scrutinizer ignore-unused */ ?bool &$unicode = null): string

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
394
    {
395 22
        if ($this->has('ToUnicode')) {
396 18
            return $this->decodeContentByToUnicodeCMap($text);
397
        }
398
399 14
        if ($this->has('Encoding')) {
400 10
            $result = $this->decodeContentByEncoding($text);
401
402 10
            if (null !== $result) {
403 10
                return $result;
404
            }
405
        }
406
407 8
        return $this->decodeContentByAutodetectIfNecessary($text);
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->decodeCont...etectIfNecessary($text) could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
408
    }
409
410 18
    private function decodeContentByToUnicodeCMap(string $text): string
411
    {
412 18
        $bytes = $this->tableSizes['from'];
413
414 18
        if ($bytes) {
415 18
            $result = '';
416 18
            $length = \strlen($text);
417
418 18
            for ($i = 0; $i < $length; $i += $bytes) {
419 18
                $char = substr($text, $i, $bytes);
420
421 18
                if (false !== ($decoded = $this->translateChar($char, false))) {
422 18
                    $char = $decoded;
423
                } elseif ($this->has('DescendantFonts')) {
424
                    if ($this->get('DescendantFonts') instanceof PDFObject) {
425
                        $fonts = $this->get('DescendantFonts')->getHeader()->getElements();
0 ignored issues
show
Bug introduced by
The method getHeader() does not exist on Smalot\PdfParser\Element. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

425
                        $fonts = $this->get('DescendantFonts')->/** @scrutinizer ignore-call */ getHeader()->getElements();

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
426
                    } else {
427
                        $fonts = $this->get('DescendantFonts')->getContent();
428
                    }
429
                    $decoded = false;
430
431
                    foreach ($fonts as $font) {
432
                        if ($font instanceof self) {
433
                            if (false !== ($decoded = $font->translateChar($char, false))) {
434
                                $decoded = mb_convert_encoding($decoded, 'UTF-8', 'Windows-1252');
0 ignored issues
show
Bug introduced by
It seems like $decoded can also be of type true; however, parameter $string of mb_convert_encoding() does only seem to accept array|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

434
                                $decoded = mb_convert_encoding(/** @scrutinizer ignore-type */ $decoded, 'UTF-8', 'Windows-1252');
Loading history...
435
                                break;
436
                            }
437
                        }
438
                    }
439
440
                    if (false !== $decoded) {
441
                        $char = $decoded;
442
                    } else {
443
                        $char = mb_convert_encoding($char, 'UTF-8', 'Windows-1252');
444
                    }
445
                } else {
446
                    $char = self::MISSING;
447
                }
448
449 18
                $result .= $char;
450
            }
451
452 18
            $text = $result;
453
        }
454
455 18
        return $text;
456
    }
457
458
    /**
459
     * Decode content by any type of Encoding (dictionary's item) instance.
460
     */
461 10
    private function decodeContentByEncoding(string $text): ?string
462
    {
463 10
        $encoding = $this->get('Encoding');
464
465
        // When Encoding referenced by object id (/Encoding 520 0 R) but object itself does not contain `/Type /Encoding` in it's dictionary.
466 10
        if ($encoding instanceof PDFObject) {
0 ignored issues
show
introduced by
$encoding is never a sub-type of Smalot\PdfParser\PDFObject.
Loading history...
467 3
            $encoding = $this->getInitializedEncodingByPdfObject($encoding);
468
        }
469
470
        // When Encoding referenced by object id (/Encoding 520 0 R) but object itself contains `/Type /Encoding` in it's dictionary.
471 10
        if ($encoding instanceof Encoding) {
0 ignored issues
show
introduced by
$encoding is never a sub-type of Smalot\PdfParser\Encoding.
Loading history...
472 3
            return $this->decodeContentByEncodingEncoding($text, $encoding);
473
        }
474
475
        // When Encoding is just string (/Encoding /WinAnsiEncoding)
476 7
        if ($encoding instanceof Element) { //todo: ElementString class must by used?
0 ignored issues
show
introduced by
$encoding is always a sub-type of Smalot\PdfParser\Element.
Loading history...
477 7
            return $this->decodeContentByEncodingElement($text, $encoding);
478
        }
479
480
        // Encoding has unintended type.
481
        $encodingClassName = \get_class($encoding);
482
        throw new LogicException("Unknown encoding instance type: {$encodingClassName}");
483
    }
484
485 3
    private function getInitializedEncodingByPdfObject(PDFObject $PDFObject): Encoding
486
    {
487 3
        if (!$this->initializedEncodingByPdfObject) {
488 3
            $this->initializedEncodingByPdfObject = $this->createInitializedEncodingByPdfObject($PDFObject);
489
        }
490
491 3
        return $this->initializedEncodingByPdfObject;
492
    }
493
494
    /**
495
     * Decode content when Encoding is instance of Encoding.
496
     */
497 3
    private function decodeContentByEncodingEncoding(string $text, Encoding $encoding): string
498
    {
499 3
        $result = '';
500 3
        $length = \strlen($text);
501
502 3
        for ($i = 0; $i < $length; ++$i) {
503 3
            $dec_av = hexdec(bin2hex($text[$i]));
504 3
            $dec_ap = $encoding->translateChar($dec_av);
505 3
            $result .= self::uchr($dec_ap ?? $dec_av);
0 ignored issues
show
Bug introduced by
It seems like $dec_ap ?? $dec_av can also be of type double; however, parameter $code of Smalot\PdfParser\Font::uchr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

505
            $result .= self::uchr(/** @scrutinizer ignore-type */ $dec_ap ?? $dec_av);
Loading history...
506
        }
507
508 3
        return $result;
509
    }
510
511
    /**
512
     * Decode content when Encoding is instance of Element.
513
     */
514 7
    private function decodeContentByEncodingElement(string $text, Element $encoding): ?string
515
    {
516 7
        $pdfEncodingName = $encoding->getContent();
517
518
        // mb_convert_encoding does not support MacRoman/macintosh,
519
        // so we use iconv() here
520 7
        $iconvEncodingName = $this->getIconvEncodingNameOrNullByPdfEncodingName($pdfEncodingName);
521
522 7
        return $iconvEncodingName ? iconv($iconvEncodingName, 'UTF-8', $text) : null;
523
    }
524
525
    /**
526
     * Convert PDF encoding name to iconv-known encoding name.
527
     */
528 7
    private function getIconvEncodingNameOrNullByPdfEncodingName(string $pdfEncodingName): ?string
529
    {
530
        $pdfToIconvEncodingNameMap = [
531 7
            'StandardEncoding' => 'ISO-8859-1',
532
            'MacRomanEncoding' => 'MACINTOSH',
533
            'WinAnsiEncoding' => 'CP1252',
534
        ];
535
536 7
        return \array_key_exists($pdfEncodingName, $pdfToIconvEncodingNameMap)
537 7
            ? $pdfToIconvEncodingNameMap[$pdfEncodingName]
538 7
            : null;
539
    }
540
541 8
    private function decodeContentByAutodetectIfNecessary($text)
542
    {
543 8
        if (mb_check_encoding($text, 'UTF-8')) {
544 8
            return $text;
545
        }
546
547 1
        return mb_convert_encoding($text, 'UTF-8', 'Windows-1252');
548
        //todo: Why exactly `Windows-1252` used?
549
    }
550
551
    /**
552
     * Create Encoding instance by PDFObject instance and init it.
553
     */
554 3
    private function createInitializedEncodingByPdfObject(PDFObject $PDFObject): Encoding
555
    {
556 3
        $encoding = $this->createEncodingByPdfObject($PDFObject);
557 3
        $encoding->init();
558
559 3
        return $encoding;
560
    }
561
562
    /**
563
     * Create Encoding instance by PDFObject instance (without init).
564
     */
565 3
    private function createEncodingByPdfObject(PDFObject $PDFObject): Encoding
566
    {
567 3
        $document = $PDFObject->getDocument();
568 3
        $header = $PDFObject->getHeader();
569 3
        $content = $PDFObject->getContent();
570 3
        $config = $PDFObject->getConfig();
571
572 3
        return new Encoding($document, $header, $content, $config);
573
    }
574
}
575