Total Complexity | 95 |
Total Lines | 667 |
Duplicated Lines | 0 % |
Changes | 15 | ||
Bugs | 2 | Features | 2 |
Complex classes like Font often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Font, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
41 | class Font extends PDFObject |
||
42 | { |
||
43 | public const MISSING = '?'; |
||
44 | |||
45 | /** |
||
46 | * @var array |
||
47 | */ |
||
48 | protected $table; |
||
49 | |||
50 | /** |
||
51 | * @var array |
||
52 | */ |
||
53 | protected $tableSizes; |
||
54 | |||
55 | /** |
||
56 | * Caches results from uchr. |
||
57 | * |
||
58 | * @var array |
||
59 | */ |
||
60 | private static $uchrCache = []; |
||
61 | |||
62 | /** |
||
63 | * In some PDF-files encoding could be referenced by object id but object itself does not contain |
||
64 | * `/Type /Encoding` in its dictionary. These objects wouldn't be initialized as Encoding in |
||
65 | * \Smalot\PdfParser\PDFObject::factory() during file parsing (they would be just PDFObject). |
||
66 | * |
||
67 | * Therefore, we create an instance of Encoding from them during decoding and cache this value in this property. |
||
68 | * |
||
69 | * @var Encoding |
||
70 | * |
||
71 | * @see https://github.com/smalot/pdfparser/pull/500 |
||
72 | */ |
||
73 | private $initializedEncodingByPdfObject; |
||
74 | |||
75 | public function init() |
||
76 | { |
||
77 | // Load translate table. |
||
78 | $this->loadTranslateTable(); |
||
79 | } |
||
80 | |||
81 | public function getName(): string |
||
82 | { |
||
83 | return $this->has('BaseFont') ? (string) $this->get('BaseFont') : '[Unknown]'; |
||
84 | } |
||
85 | |||
86 | public function getType(): string |
||
87 | { |
||
88 | return (string) $this->header->get('Subtype'); |
||
89 | } |
||
90 | |||
91 | public function getDetails(bool $deep = true): array |
||
92 | { |
||
93 | $details = []; |
||
94 | |||
95 | $details['Name'] = $this->getName(); |
||
96 | $details['Type'] = $this->getType(); |
||
97 | $details['Encoding'] = ($this->has('Encoding') ? (string) $this->get('Encoding') : 'Ansi'); |
||
98 | |||
99 | $details += parent::getDetails($deep); |
||
100 | |||
101 | return $details; |
||
102 | } |
||
103 | |||
104 | /** |
||
105 | * @return string|bool |
||
106 | */ |
||
107 | public function translateChar(string $char, bool $use_default = true) |
||
133 | } |
||
134 | |||
135 | /** |
||
136 | * Convert unicode character code to "utf-8" encoded string. |
||
137 | * |
||
138 | * @param int|float $code Unicode character code. Will be casted to int internally! |
||
139 | */ |
||
140 | public static function uchr($code): string |
||
154 | } |
||
155 | |||
156 | /** |
||
157 | * Init internal chars translation table by ToUnicode CMap. |
||
158 | */ |
||
159 | public function loadTranslateTable(): array |
||
160 | { |
||
161 | if (null !== $this->table) { |
||
162 | return $this->table; |
||
163 | } |
||
164 | |||
165 | $this->table = []; |
||
166 | $this->tableSizes = [ |
||
167 | 'from' => 1, |
||
168 | 'to' => 1, |
||
169 | ]; |
||
170 | |||
171 | if ($this->has('ToUnicode')) { |
||
172 | $content = $this->get('ToUnicode')->getContent(); |
||
173 | $matches = []; |
||
174 | |||
175 | // Support for multiple spacerange sections |
||
176 | if (preg_match_all('/begincodespacerange(?P<sections>.*?)endcodespacerange/s', $content, $matches)) { |
||
177 | foreach ($matches['sections'] as $section) { |
||
178 | $regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)>[ \r\n]+/is'; |
||
179 | |||
180 | preg_match_all($regexp, $section, $matches); |
||
181 | |||
182 | $this->tableSizes = [ |
||
183 | 'from' => max(1, \strlen(current($matches['from'])) / 2), |
||
184 | 'to' => max(1, \strlen(current($matches['to'])) / 2), |
||
185 | ]; |
||
186 | |||
187 | break; |
||
188 | } |
||
189 | } |
||
190 | |||
191 | // Support for multiple bfchar sections |
||
192 | if (preg_match_all('/beginbfchar(?P<sections>.*?)endbfchar/s', $content, $matches)) { |
||
193 | foreach ($matches['sections'] as $section) { |
||
194 | $regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)>[ \r\n]+/is'; |
||
195 | |||
196 | preg_match_all($regexp, $section, $matches); |
||
197 | |||
198 | $this->tableSizes['from'] = max(1, \strlen(current($matches['from'])) / 2); |
||
199 | |||
200 | foreach ($matches['from'] as $key => $from) { |
||
201 | $parts = preg_split( |
||
202 | '/([0-9A-F]{4})/i', |
||
203 | $matches['to'][$key], |
||
204 | 0, |
||
205 | \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE |
||
206 | ); |
||
207 | $text = ''; |
||
208 | foreach ($parts as $part) { |
||
209 | $text .= self::uchr(hexdec($part)); |
||
210 | } |
||
211 | $this->table[hexdec($from)] = $text; |
||
212 | } |
||
213 | } |
||
214 | } |
||
215 | |||
216 | // Support for multiple bfrange sections |
||
217 | if (preg_match_all('/beginbfrange(?P<sections>.*?)endbfrange/s', $content, $matches)) { |
||
218 | foreach ($matches['sections'] as $section) { |
||
219 | /** |
||
220 | * Regexp to capture <from>, <to>, and either <offset> or [...] items. |
||
221 | * - (?P<from>...) Source range's start |
||
222 | * - (?P<to>...) Source range's end |
||
223 | * - (?P<dest>...) Destination range's offset or each char code |
||
224 | * Some PDF file has 2-byte Unicode values on new lines > added \r\n |
||
225 | */ |
||
226 | $regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)> *(?P<dest><[0-9A-F]+>|\[[\r\n<>0-9A-F ]+\])[ \r\n]+/is'; |
||
227 | |||
228 | preg_match_all($regexp, $section, $matches); |
||
229 | |||
230 | foreach ($matches['from'] as $key => $from) { |
||
231 | $char_from = hexdec($from); |
||
232 | $char_to = hexdec($matches['to'][$key]); |
||
233 | $dest = $matches['dest'][$key]; |
||
234 | |||
235 | if (1 === preg_match('/^<(?P<offset>[0-9A-F]+)>$/i', $dest, $offset_matches)) { |
||
236 | // Support for : <srcCode1> <srcCode2> <dstString> |
||
237 | $offset = hexdec($offset_matches['offset']); |
||
238 | |||
239 | for ($char = $char_from; $char <= $char_to; ++$char) { |
||
240 | $this->table[$char] = self::uchr($char - $char_from + $offset); |
||
241 | } |
||
242 | } else { |
||
243 | // Support for : <srcCode1> <srcCodeN> [<dstString1> <dstString2> ... <dstStringN>] |
||
244 | $strings = []; |
||
245 | $matched = preg_match_all('/<(?P<string>[0-9A-F]+)> */is', $dest, $strings); |
||
246 | if (false === $matched || 0 === $matched) { |
||
247 | continue; |
||
248 | } |
||
249 | |||
250 | foreach ($strings['string'] as $position => $string) { |
||
251 | $parts = preg_split( |
||
252 | '/([0-9A-F]{4})/i', |
||
253 | $string, |
||
254 | 0, |
||
255 | \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE |
||
256 | ); |
||
257 | if (false === $parts) { |
||
258 | continue; |
||
259 | } |
||
260 | $text = ''; |
||
261 | foreach ($parts as $part) { |
||
262 | $text .= self::uchr(hexdec($part)); |
||
263 | } |
||
264 | $this->table[$char_from + $position] = $text; |
||
265 | } |
||
266 | } |
||
267 | } |
||
268 | } |
||
269 | } |
||
270 | } |
||
271 | |||
272 | return $this->table; |
||
273 | } |
||
274 | |||
275 | /** |
||
276 | * Set custom char translation table where: |
||
277 | * - key - integer character code; |
||
278 | * - value - "utf-8" encoded value; |
||
279 | * |
||
280 | * @return void |
||
281 | */ |
||
282 | public function setTable(array $table) |
||
283 | { |
||
284 | $this->table = $table; |
||
285 | } |
||
286 | |||
287 | /** |
||
288 | * Calculate text width with data from header 'Widths'. If width of character is not found then character is added to missing array. |
||
289 | */ |
||
290 | public function calculateTextWidth(string $text, ?array &$missing = null): ?float |
||
291 | { |
||
292 | $index_map = array_flip($this->table); |
||
293 | $details = $this->getDetails(); |
||
294 | |||
295 | // Usually, Widths key is set in $details array, but if it isn't use an empty array instead. |
||
296 | $widths = $details['Widths'] ?? []; |
||
297 | |||
298 | /* |
||
299 | * Widths array is zero indexed but table is not. We must map them based on FirstChar and LastChar |
||
300 | * |
||
301 | * Note: Without the change you would see warnings in PHP 8.4 because the values of FirstChar or LastChar |
||
302 | * can be null sometimes. |
||
303 | */ |
||
304 | $width_map = array_flip(range((int) $details['FirstChar'], (int) $details['LastChar'])); |
||
305 | |||
306 | $width = null; |
||
307 | $missing = []; |
||
308 | $textLength = mb_strlen($text); |
||
309 | for ($i = 0; $i < $textLength; ++$i) { |
||
310 | $char = mb_substr($text, $i, 1); |
||
311 | if ( |
||
312 | !\array_key_exists($char, $index_map) |
||
313 | || !\array_key_exists($index_map[$char], $width_map) |
||
314 | || !\array_key_exists($width_map[$index_map[$char]], $widths) |
||
315 | ) { |
||
316 | $missing[] = $char; |
||
317 | continue; |
||
318 | } |
||
319 | $width_index = $width_map[$index_map[$char]]; |
||
320 | $width += $widths[$width_index]; |
||
321 | } |
||
322 | |||
323 | return $width; |
||
324 | } |
||
325 | |||
326 | /** |
||
327 | * Decode hexadecimal encoded string. If $add_braces is true result value would be wrapped by parentheses. |
||
328 | */ |
||
329 | public static function decodeHexadecimal(string $hexa, bool $add_braces = false): string |
||
330 | { |
||
331 | // Special shortcut for XML content. |
||
332 | if (false !== stripos($hexa, '<?xml')) { |
||
333 | return $hexa; |
||
334 | } |
||
335 | |||
336 | $text = ''; |
||
337 | $parts = preg_split('/(<[a-f0-9\s]+>)/si', $hexa, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE); |
||
338 | |||
339 | foreach ($parts as $part) { |
||
340 | if (preg_match('/^<[a-f0-9\s]+>$/si', $part)) { |
||
341 | // strip whitespace |
||
342 | $part = preg_replace("/\s/", '', $part); |
||
343 | $part = trim($part, '<>'); |
||
344 | if ($add_braces) { |
||
345 | $text .= '('; |
||
346 | } |
||
347 | |||
348 | $part = pack('H*', $part); |
||
349 | $text .= ($add_braces ? preg_replace('/\\\/s', '\\\\\\', $part) : $part); |
||
350 | |||
351 | if ($add_braces) { |
||
352 | $text .= ')'; |
||
353 | } |
||
354 | } else { |
||
355 | $text .= $part; |
||
356 | } |
||
357 | } |
||
358 | |||
359 | return $text; |
||
360 | } |
||
361 | |||
362 | /** |
||
363 | * Decode string with octal-decoded chunks. |
||
364 | */ |
||
365 | public static function decodeOctal(string $text): string |
||
381 | } |
||
382 | |||
383 | /** |
||
384 | * Decode string with html entity encoded chars. |
||
385 | */ |
||
386 | public static function decodeEntities(string $text): string |
||
387 | { |
||
388 | return preg_replace_callback('/#([0-9a-f]{2})/i', function ($m) { |
||
389 | return \chr(hexdec($m[1])); |
||
390 | }, $text); |
||
391 | } |
||
392 | |||
393 | /** |
||
394 | * Check if given string is Unicode text (by BOM); |
||
395 | * If true - decode to "utf-8" encoded string. |
||
396 | * Otherwise - return text as is. |
||
397 | * |
||
398 | * @todo Rename in next major release to make the name correspond to reality (for ex. decodeIfUnicode()) |
||
399 | */ |
||
400 | public static function decodeUnicode(string $text): string |
||
401 | { |
||
402 | if ("\xFE\xFF" === substr($text, 0, 2)) { |
||
403 | // Strip U+FEFF byte order marker. |
||
404 | $decode = substr($text, 2); |
||
405 | $text = ''; |
||
406 | $length = \strlen($decode); |
||
407 | |||
408 | for ($i = 0; $i < $length; $i += 2) { |
||
409 | $text .= self::uchr(hexdec(bin2hex(substr($decode, $i, 2)))); |
||
410 | } |
||
411 | } |
||
412 | |||
413 | return $text; |
||
414 | } |
||
415 | |||
416 | /** |
||
417 | * @todo Deprecated, use $this->config->getFontSpaceLimit() instead. |
||
418 | */ |
||
419 | protected function getFontSpaceLimit(): int |
||
422 | } |
||
423 | |||
424 | /** |
||
425 | * Decode text by commands array. |
||
426 | */ |
||
427 | public function decodeText(array $commands, float $fontFactor = 4): string |
||
428 | { |
||
429 | $word_position = 0; |
||
430 | $words = []; |
||
431 | $font_space = $this->getFontSpaceLimit() * abs($fontFactor) / 4; |
||
432 | |||
433 | foreach ($commands as $command) { |
||
434 | switch ($command[PDFObject::TYPE]) { |
||
435 | case 'n': |
||
436 | $offset = (float) trim($command[PDFObject::COMMAND]); |
||
437 | if ($offset - (float) $font_space < 0) { |
||
438 | $word_position = \count($words); |
||
439 | } |
||
440 | continue 2; |
||
441 | case '<': |
||
442 | // Decode hexadecimal. |
||
443 | $text = self::decodeHexadecimal('<'.$command[PDFObject::COMMAND].'>'); |
||
444 | break; |
||
445 | |||
446 | default: |
||
447 | // Decode octal (if necessary). |
||
448 | $text = self::decodeOctal($command[PDFObject::COMMAND]); |
||
449 | } |
||
450 | |||
451 | // replace escaped chars |
||
452 | $text = str_replace( |
||
453 | ['\\\\', '\(', '\)', '\n', '\r', '\t', '\f', '\ ', '\b'], |
||
454 | [\chr(92), \chr(40), \chr(41), \chr(10), \chr(13), \chr(9), \chr(12), \chr(32), \chr(8)], |
||
455 | $text |
||
456 | ); |
||
457 | |||
458 | // add content to result string |
||
459 | if (isset($words[$word_position])) { |
||
460 | $words[$word_position] .= $text; |
||
461 | } else { |
||
462 | $words[$word_position] = $text; |
||
463 | } |
||
464 | } |
||
465 | |||
466 | foreach ($words as &$word) { |
||
467 | $word = $this->decodeContent($word); |
||
468 | $word = str_replace("\t", ' ', $word); |
||
469 | } |
||
470 | |||
471 | // Remove internal "words" that are just spaces, but leave them |
||
472 | // if they are at either end of the array of words. This fixes, |
||
473 | // for example, lines that are justified to fill |
||
474 | // a whole row. |
||
475 | for ($x = \count($words) - 2; $x >= 1; --$x) { |
||
476 | if ('' === trim($words[$x], ' ')) { |
||
477 | unset($words[$x]); |
||
478 | } |
||
479 | } |
||
480 | $words = array_values($words); |
||
481 | |||
482 | // Cut down on the number of unnecessary internal spaces by |
||
483 | // imploding the string on the null byte, and checking if the |
||
484 | // text includes extra spaces on either side. If so, merge |
||
485 | // where appropriate. |
||
486 | $words = implode("\x00\x00", $words); |
||
487 | $words = str_replace( |
||
488 | [" \x00\x00 ", "\x00\x00 ", " \x00\x00", "\x00\x00"], |
||
489 | [' ', ' ', ' ', ' '], |
||
490 | $words |
||
491 | ); |
||
492 | |||
493 | return $words; |
||
494 | } |
||
495 | |||
496 | /** |
||
497 | * Decode given $text to "utf-8" encoded string. |
||
498 | * |
||
499 | * @param bool $unicode This parameter is deprecated and might be removed in a future release |
||
500 | */ |
||
501 | public function decodeContent(string $text, ?bool &$unicode = null): string |
||
502 | { |
||
503 | // If this string begins with a UTF-16BE BOM, then decode it |
||
504 | // directly as Unicode |
||
505 | if ("\xFE\xFF" === substr($text, 0, 2)) { |
||
506 | return $this->decodeUnicode($text); |
||
507 | } |
||
508 | |||
509 | if ($this->has('ToUnicode')) { |
||
510 | return $this->decodeContentByToUnicodeCMapOrDescendantFonts($text); |
||
511 | } |
||
512 | |||
513 | if ($this->has('Encoding')) { |
||
514 | $result = $this->decodeContentByEncoding($text); |
||
515 | |||
516 | if (null !== $result) { |
||
517 | return $result; |
||
518 | } |
||
519 | } |
||
520 | |||
521 | return $this->decodeContentByAutodetectIfNecessary($text); |
||
522 | } |
||
523 | |||
524 | /** |
||
525 | * First try to decode $text by ToUnicode CMap. |
||
526 | * If char translation not found in ToUnicode CMap tries: |
||
527 | * - If DescendantFonts exists tries to decode char by one of that fonts. |
||
528 | * - If have no success to decode by DescendantFonts interpret $text as a string with "Windows-1252" encoding. |
||
529 | * - If DescendantFonts does not exist just return "?" as decoded char. |
||
530 | * |
||
531 | * @todo Seems this is invalid algorithm that do not follow pdf-format specification. Must be rewritten. |
||
532 | */ |
||
533 | private function decodeContentByToUnicodeCMapOrDescendantFonts(string $text): string |
||
534 | { |
||
535 | $bytes = $this->tableSizes['from']; |
||
536 | |||
537 | if ($bytes) { |
||
538 | $result = ''; |
||
539 | $length = \strlen($text); |
||
540 | |||
541 | for ($i = 0; $i < $length; $i += $bytes) { |
||
542 | $char = substr($text, $i, $bytes); |
||
543 | |||
544 | if (false !== ($decoded = $this->translateChar($char, false))) { |
||
545 | $char = $decoded; |
||
546 | } elseif ($this->has('DescendantFonts')) { |
||
547 | if ($this->get('DescendantFonts') instanceof PDFObject) { |
||
548 | $fonts = $this->get('DescendantFonts')->getHeader()->getElements(); |
||
549 | } else { |
||
550 | $fonts = $this->get('DescendantFonts')->getContent(); |
||
551 | } |
||
552 | $decoded = false; |
||
553 | |||
554 | foreach ($fonts as $font) { |
||
555 | if ($font instanceof self) { |
||
556 | if (false !== ($decoded = $font->translateChar($char, false))) { |
||
557 | $decoded = mb_convert_encoding($decoded, 'UTF-8', 'Windows-1252'); |
||
558 | break; |
||
559 | } |
||
560 | } |
||
561 | } |
||
562 | |||
563 | if (false !== $decoded) { |
||
564 | $char = $decoded; |
||
565 | } else { |
||
566 | $char = mb_convert_encoding($char, 'UTF-8', 'Windows-1252'); |
||
567 | } |
||
568 | } else { |
||
569 | $char = self::MISSING; |
||
570 | } |
||
571 | |||
572 | $result .= $char; |
||
573 | } |
||
574 | |||
575 | $text = $result; |
||
576 | } |
||
577 | |||
578 | return $text; |
||
579 | } |
||
580 | |||
581 | /** |
||
582 | * Decode content by any type of Encoding (dictionary's item) instance. |
||
583 | */ |
||
584 | private function decodeContentByEncoding(string $text): ?string |
||
585 | { |
||
586 | $encoding = $this->get('Encoding'); |
||
587 | |||
588 | // When Encoding referenced by object id (/Encoding 520 0 R) but object itself does not contain `/Type /Encoding` in it's dictionary. |
||
589 | if ($encoding instanceof PDFObject) { |
||
590 | $encoding = $this->getInitializedEncodingByPdfObject($encoding); |
||
591 | } |
||
592 | |||
593 | // When Encoding referenced by object id (/Encoding 520 0 R) but object itself contains `/Type /Encoding` in it's dictionary. |
||
594 | if ($encoding instanceof Encoding) { |
||
595 | return $this->decodeContentByEncodingEncoding($text, $encoding); |
||
596 | } |
||
597 | |||
598 | // When Encoding is just string (/Encoding /WinAnsiEncoding) |
||
599 | if ($encoding instanceof Element) { // todo: ElementString class must by used? |
||
600 | return $this->decodeContentByEncodingElement($text, $encoding); |
||
601 | } |
||
602 | |||
603 | // don't double-encode strings already in UTF-8 |
||
604 | if (!mb_check_encoding($text, 'UTF-8')) { |
||
605 | return mb_convert_encoding($text, 'UTF-8', 'Windows-1252'); |
||
606 | } |
||
607 | |||
608 | return $text; |
||
609 | } |
||
610 | |||
611 | /** |
||
612 | * Returns already created or create a new one if not created before Encoding instance by PDFObject instance. |
||
613 | */ |
||
614 | private function getInitializedEncodingByPdfObject(PDFObject $PDFObject): Encoding |
||
615 | { |
||
616 | if (!$this->initializedEncodingByPdfObject) { |
||
617 | $this->initializedEncodingByPdfObject = $this->createInitializedEncodingByPdfObject($PDFObject); |
||
618 | } |
||
619 | |||
620 | return $this->initializedEncodingByPdfObject; |
||
621 | } |
||
622 | |||
623 | /** |
||
624 | * Decode content when $encoding (given by $this->get('Encoding')) is instance of Encoding. |
||
625 | */ |
||
626 | private function decodeContentByEncodingEncoding(string $text, Encoding $encoding): string |
||
627 | { |
||
628 | $result = ''; |
||
629 | $length = \strlen($text); |
||
630 | |||
631 | for ($i = 0; $i < $length; ++$i) { |
||
632 | $dec_av = hexdec(bin2hex($text[$i])); |
||
633 | $dec_ap = $encoding->translateChar($dec_av); |
||
634 | $result .= self::uchr($dec_ap ?? $dec_av); |
||
635 | } |
||
636 | |||
637 | return $result; |
||
638 | } |
||
639 | |||
640 | /** |
||
641 | * Decode content when $encoding (given by $this->get('Encoding')) is instance of Element. |
||
642 | */ |
||
643 | private function decodeContentByEncodingElement(string $text, Element $encoding): ?string |
||
644 | { |
||
645 | $pdfEncodingName = $encoding->getContent(); |
||
646 | |||
647 | // mb_convert_encoding does not support MacRoman/macintosh, |
||
648 | // so we use iconv() here |
||
649 | $iconvEncodingName = $this->getIconvEncodingNameOrNullByPdfEncodingName($pdfEncodingName); |
||
650 | |||
651 | return $iconvEncodingName ? iconv($iconvEncodingName, 'UTF-8//TRANSLIT//IGNORE', $text) : null; |
||
652 | } |
||
653 | |||
654 | /** |
||
655 | * Convert PDF encoding name to iconv-known encoding name. |
||
656 | */ |
||
657 | private function getIconvEncodingNameOrNullByPdfEncodingName(string $pdfEncodingName): ?string |
||
658 | { |
||
659 | $pdfToIconvEncodingNameMap = [ |
||
660 | 'StandardEncoding' => 'ISO-8859-1', |
||
661 | 'MacRomanEncoding' => 'MACINTOSH', |
||
662 | 'WinAnsiEncoding' => 'CP1252', |
||
663 | ]; |
||
664 | |||
665 | return \array_key_exists($pdfEncodingName, $pdfToIconvEncodingNameMap) |
||
666 | ? $pdfToIconvEncodingNameMap[$pdfEncodingName] |
||
667 | : null; |
||
668 | } |
||
669 | |||
670 | /** |
||
671 | * If string seems like "utf-8" encoded string do nothing and just return given string as is. |
||
672 | * Otherwise, interpret string as "Window-1252" encoded string. |
||
673 | * |
||
674 | * @return string|false |
||
675 | */ |
||
676 | private function decodeContentByAutodetectIfNecessary(string $text) |
||
677 | { |
||
678 | if (mb_check_encoding($text, 'UTF-8')) { |
||
679 | return $text; |
||
680 | } |
||
681 | |||
682 | return mb_convert_encoding($text, 'UTF-8', 'Windows-1252'); |
||
683 | // todo: Why exactly `Windows-1252` used? |
||
684 | } |
||
685 | |||
686 | /** |
||
687 | * Create Encoding instance by PDFObject instance and init it. |
||
688 | */ |
||
689 | private function createInitializedEncodingByPdfObject(PDFObject $PDFObject): Encoding |
||
690 | { |
||
691 | $encoding = $this->createEncodingByPdfObject($PDFObject); |
||
692 | $encoding->init(); |
||
693 | |||
694 | return $encoding; |
||
695 | } |
||
696 | |||
697 | /** |
||
698 | * Create Encoding instance by PDFObject instance (without init). |
||
699 | */ |
||
700 | private function createEncodingByPdfObject(PDFObject $PDFObject): Encoding |
||
708 | } |
||
709 | } |
||
710 |