Passed
Push — develop ( 30cf64...589229 )
by Guillaume
06:18 queued 04:10
created

ASCII::to_transliterate()   F

Complexity

Conditions 31
Paths 2569

Size

Total Lines 203
Code Lines 78

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 31
eloc 78
nc 2569
nop 3
dl 0
loc 203
rs 0
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class ASCII
11
{
12
    //
13
    // INFO: https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
14
    //
15
16
    const UZBEK_LANGUAGE_CODE = 'uz';
17
18
    const TURKMEN_LANGUAGE_CODE = 'tk';
19
20
    const THAI_LANGUAGE_CODE = 'th';
21
22
    const PASHTO_LANGUAGE_CODE = 'ps';
23
24
    const ORIYA_LANGUAGE_CODE = 'or';
25
26
    const MONGOLIAN_LANGUAGE_CODE = 'mn';
27
28
    const KOREAN_LANGUAGE_CODE = 'ko';
29
30
    const KIRGHIZ_LANGUAGE_CODE = 'ky';
31
32
    const ARMENIAN_LANGUAGE_CODE = 'hy';
33
34
    const BENGALI_LANGUAGE_CODE = 'bn';
35
36
    const BELARUSIAN_LANGUAGE_CODE = 'be';
37
38
    const AMHARIC_LANGUAGE_CODE = 'am';
39
40
    const JAPANESE_LANGUAGE_CODE = 'ja';
41
42
    const CHINESE_LANGUAGE_CODE = 'zh';
43
44
    const DUTCH_LANGUAGE_CODE = 'nl';
45
46
    const ITALIAN_LANGUAGE_CODE = 'it';
47
48
    const MACEDONIAN_LANGUAGE_CODE = 'mk';
49
50
    const PORTUGUESE_LANGUAGE_CODE = 'pt';
51
52
    const GREEKLISH_LANGUAGE_CODE = 'el__greeklish';
53
54
    const GREEK_LANGUAGE_CODE = 'el';
55
56
    const HINDI_LANGUAGE_CODE = 'hi';
57
58
    const SWEDISH_LANGUAGE_CODE = 'sv';
59
60
    const TURKISH_LANGUAGE_CODE = 'tr';
61
62
    const BULGARIAN_LANGUAGE_CODE = 'bg';
63
64
    const HUNGARIAN_LANGUAGE_CODE = 'hu';
65
66
    const MYANMAR_LANGUAGE_CODE = 'my';
67
68
    const CROATIAN_LANGUAGE_CODE = 'hr';
69
70
    const FINNISH_LANGUAGE_CODE = 'fi';
71
72
    const GEORGIAN_LANGUAGE_CODE = 'ka';
73
74
    const RUSSIAN_LANGUAGE_CODE = 'ru';
75
76
    const RUSSIAN_PASSPORT_2013_LANGUAGE_CODE = 'ru__passport_2013';
77
78
    const RUSSIAN_GOST_2000_B_LANGUAGE_CODE = 'ru__gost_2000_b';
79
80
    const UKRAINIAN_LANGUAGE_CODE = 'uk';
81
82
    const KAZAKH_LANGUAGE_CODE = 'kk';
83
84
    const CZECH_LANGUAGE_CODE = 'cs';
85
86
    const DANISH_LANGUAGE_CODE = 'da';
87
88
    const POLISH_LANGUAGE_CODE = 'pl';
89
90
    const ROMANIAN_LANGUAGE_CODE = 'ro';
91
92
    const ESPERANTO_LANGUAGE_CODE = 'eo';
93
94
    const ESTONIAN_LANGUAGE_CODE = 'et';
95
96
    const LATVIAN_LANGUAGE_CODE = 'lv';
97
98
    const LITHUANIAN_LANGUAGE_CODE = 'lt';
99
100
    const NORWEGIAN_LANGUAGE_CODE = 'no';
101
102
    const VIETNAMESE_LANGUAGE_CODE = 'vi';
103
104
    const ARABIC_LANGUAGE_CODE = 'ar';
105
106
    const PERSIAN_LANGUAGE_CODE = 'fa';
107
108
    const SERBIAN_LANGUAGE_CODE = 'sr';
109
110
    const SERBIAN_CYRILLIC_LANGUAGE_CODE = 'sr__cyr';
111
112
    const SERBIAN_LATIN_LANGUAGE_CODE = 'sr__lat';
113
114
    const AZERBAIJANI_LANGUAGE_CODE = 'az';
115
116
    const SLOVAK_LANGUAGE_CODE = 'sk';
117
118
    const FRENCH_LANGUAGE_CODE = 'fr';
119
120
    const FRENCH_AUSTRIAN_LANGUAGE_CODE = 'fr_at';
121
122
    const FRENCH_SWITZERLAND_LANGUAGE_CODE = 'fr_ch';
123
124
    const GERMAN_LANGUAGE_CODE = 'de';
125
126
    const GERMAN_AUSTRIAN_LANGUAGE_CODE = 'de_at';
127
128
    const GERMAN_SWITZERLAND_LANGUAGE_CODE = 'de_ch';
129
130
    const ENGLISH_LANGUAGE_CODE = 'en';
131
132
    const EXTRA_LATIN_CHARS_LANGUAGE_CODE = 'latin';
133
134
    const EXTRA_WHITESPACE_CHARS_LANGUAGE_CODE = ' ';
135
136
    const EXTRA_MSWORD_CHARS_LANGUAGE_CODE = 'msword';
137
138
    /**
139
     * @var array<string, array<string, string>>|null
140
     */
141
    private static $ASCII_MAPS;
142
143
    /**
144
     * @var array<string, array<string, string>>|null
145
     */
146
    private static $ASCII_MAPS_AND_EXTRAS;
147
148
    /**
149
     * @var array<string, array<string, string>>|null
150
     */
151
    private static $ASCII_EXTRAS;
152
153
    /**
154
     * @var array<string, int>|null
155
     */
156
    private static $ORD;
157
158
    /**
159
     * @var array<string, int>|null
160
     */
161
    private static $LANGUAGE_MAX_KEY;
162
163
    /**
164
     * url: https://en.wikipedia.org/wiki/Wikipedia:ASCII#ASCII_printable_characters
165
     *
166
     * @var string
167
     */
168
    private static $REGEX_ASCII = "[^\x09\x10\x13\x0A\x0D\x20-\x7E]";
169
170
    /**
171
     * bidirectional text chars
172
     *
173
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
174
     *
175
     * @var array<int, string>
176
     */
177
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
178
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
179
        8234 => "\xE2\x80\xAA",
180
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
181
        8235 => "\xE2\x80\xAB",
182
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
183
        8236 => "\xE2\x80\xAC",
184
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
185
        8237 => "\xE2\x80\xAD",
186
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
187
        8238 => "\xE2\x80\xAE",
188
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
189
        8294 => "\xE2\x81\xA6",
190
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
191
        8295 => "\xE2\x81\xA7",
192
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
193
        8296 => "\xE2\x81\xA8",
194
        // POP DIRECTIONAL ISOLATE
195
        8297 => "\xE2\x81\xA9",
196
    ];
197
198
    /**
199
     * Get all languages from the constants "ASCII::.*LANGUAGE_CODE".
200
     *
201
     * @return string[]
202
     *
203
     * @psalm-return array<string, string>
204
     */
205
    public static function getAllLanguages(): array
206
    {
207
        // init
208
        static $LANGUAGES = [];
209
210
        if ($LANGUAGES !== []) {
211
            return $LANGUAGES;
212
        }
213
214
        foreach ((new \ReflectionClass(__CLASS__))->getConstants() as $constant => $lang) {
215
            if (\strpos($constant, 'EXTRA') !== false) {
216
                $LANGUAGES[\strtolower($constant)] = $lang;
217
            } else {
218
                $LANGUAGES[\strtolower(\str_replace('_LANGUAGE_CODE', '', $constant))] = $lang;
219
            }
220
        }
221
222
        return $LANGUAGES;
223
    }
224
225
    /**
226
     * Returns an replacement array for ASCII methods.
227
     *
228
     * EXAMPLE: <code>
229
     * $array = ASCII::charsArray();
230
     * var_dump($array['ru']['б']); // 'b'
231
     * </code>
232
     *
233
     * @psalm-suppress InvalidNullableReturnType - we use the prepare* methods here, so we don't get NULL here
234
     *
235
     * @param bool $replace_extra_symbols [optional] <p>Add some more replacements e.g. "£" with " pound ".</p>
236
     *
237
     * @psalm-pure
238
     *
239
     * @return array
240
     *
241
     * @psalm-return array<string, array<string , string>>
242
     */
243
    public static function charsArray(bool $replace_extra_symbols = false): array
244
    {
245
        if ($replace_extra_symbols) {
246
            self::prepareAsciiAndExtrasMaps();
247
248
            return self::$ASCII_MAPS_AND_EXTRAS ?? [];
249
        }
250
251
        self::prepareAsciiMaps();
252
253
        return self::$ASCII_MAPS ?? [];
254
    }
255
256
    /**
257
     * Returns an replacement array for ASCII methods with a mix of multiple languages.
258
     *
259
     * EXAMPLE: <code>
260
     * $array = ASCII::charsArrayWithMultiLanguageValues();
261
     * var_dump($array['b']); // ['β', 'б', 'ဗ', 'ბ', 'ب']
262
     * </code>
263
     *
264
     * @param bool $replace_extra_symbols [optional] <p>Add some more replacements e.g. "£" with " pound ".</p>
265
     *
266
     * @psalm-pure
267
     *
268
     * @return array
269
     *               <p>An array of replacements.</p>
270
     *
271
     * @psalm-return array<string, array<int, string>>
272
     */
273
    public static function charsArrayWithMultiLanguageValues(bool $replace_extra_symbols = false): array
274
    {
275
        /**
276
         * @var array<string, array>
277
         */
278
        static $CHARS_ARRAY = [];
279
        $cacheKey = '' . $replace_extra_symbols;
280
281
        if (isset($CHARS_ARRAY[$cacheKey])) {
282
            return $CHARS_ARRAY[$cacheKey];
283
        }
284
285
        // init
286
        $return = [];
287
        $language_all_chars = self::charsArrayWithSingleLanguageValues(
288
            $replace_extra_symbols,
289
            false
290
        );
291
292
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
293
        /** @var array<string, string> $language_all_chars */
294
        $language_all_chars = $language_all_chars;
295
296
        /** @noinspection AlterInForeachInspection */
297
        foreach ($language_all_chars as $key => &$value) {
298
            $return[$value][] = $key;
299
        }
300
301
        $CHARS_ARRAY[$cacheKey] = $return;
302
303
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
304
        /** @var array<string, array<int, string>> $return */
305
        $return = $return;
306
307
        return $return;
308
    }
309
310
    /**
311
     * Returns an replacement array for ASCII methods with one language.
312
     *
313
     * For example, German will map 'ä' to 'ae', while other languages
314
     * will simply return e.g. 'a'.
315
     *
316
     * EXAMPLE: <code>
317
     * $array = ASCII::charsArrayWithOneLanguage('ru');
318
     * $tmpKey = \array_search('yo', $array['replace']);
319
     * echo $array['orig'][$tmpKey]; // 'ё'
320
     * </code>
321
     *
322
     * @psalm-suppress InvalidNullableReturnType - we use the prepare* methods here, so we don't get NULL here
323
     *
324
     * @param string $language              [optional] <p>Language of the source string e.g.: en, de_at, or de-ch.
325
     *                                      (default is 'en') | ASCII::*_LANGUAGE_CODE</p>
326
     * @param bool   $replace_extra_symbols [optional] <p>Add some more replacements e.g. "£" with " pound ".</p>
327
     * @param bool   $asOrigReplaceArray    [optional] <p>TRUE === return {orig: string[], replace: string[]}
328
     *                                      array</p>
329
     *
330
     * @psalm-pure
331
     *
332
     * @return array
333
     *               <p>An array of replacements.</p>
334
     *
335
     * @psalm-return array{orig: string[], replace: string[]}|array<string, string>
336
     */
337
    public static function charsArrayWithOneLanguage(
338
        string $language = self::ENGLISH_LANGUAGE_CODE,
339
        bool $replace_extra_symbols = false,
340
        bool $asOrigReplaceArray = true
341
    ): array {
342
        $language = self::get_language($language);
343
344
        // init
345
        /**
346
         * @var array<string, array>
347
         */
348
        static $CHARS_ARRAY = [];
349
        $cacheKey = '' . $replace_extra_symbols . '-' . $asOrigReplaceArray;
350
351
        // check static cache
352
        if (isset($CHARS_ARRAY[$cacheKey][$language])) {
353
            return $CHARS_ARRAY[$cacheKey][$language];
354
        }
355
356
        if ($replace_extra_symbols) {
357
            self::prepareAsciiAndExtrasMaps();
358
359
            /** @noinspection DuplicatedCode */
360
            if (isset(self::$ASCII_MAPS_AND_EXTRAS[$language])) {
361
                $tmpArray = self::$ASCII_MAPS_AND_EXTRAS[$language];
362
363
                if ($asOrigReplaceArray) {
364
                    $CHARS_ARRAY[$cacheKey][$language] = [
365
                        'orig'    => \array_keys($tmpArray),
366
                        'replace' => \array_values($tmpArray),
367
                    ];
368
                } else {
369
                    $CHARS_ARRAY[$cacheKey][$language] = $tmpArray;
370
                }
371
            } else {
372
                /** @noinspection NestedPositiveIfStatementsInspection */
373
                if ($asOrigReplaceArray) {
374
                    $CHARS_ARRAY[$cacheKey][$language] = [
375
                        'orig'    => [],
376
                        'replace' => [],
377
                    ];
378
                } else {
379
                    $CHARS_ARRAY[$cacheKey][$language] = [];
380
                }
381
            }
382
        } else {
383
            self::prepareAsciiMaps();
384
385
            /** @noinspection DuplicatedCode */
386
            if (isset(self::$ASCII_MAPS[$language])) {
387
                $tmpArray = self::$ASCII_MAPS[$language];
388
389
                if ($asOrigReplaceArray) {
390
                    $CHARS_ARRAY[$cacheKey][$language] = [
391
                        'orig'    => \array_keys($tmpArray),
392
                        'replace' => \array_values($tmpArray),
393
                    ];
394
                } else {
395
                    $CHARS_ARRAY[$cacheKey][$language] = $tmpArray;
396
                }
397
            } else {
398
                /** @noinspection NestedPositiveIfStatementsInspection */
399
                if ($asOrigReplaceArray) {
400
                    $CHARS_ARRAY[$cacheKey][$language] = [
401
                        'orig'    => [],
402
                        'replace' => [],
403
                    ];
404
                } else {
405
                    $CHARS_ARRAY[$cacheKey][$language] = [];
406
                }
407
            }
408
        }
409
410
        return $CHARS_ARRAY[$cacheKey][$language] ?? ['orig' => [], 'replace' => []];
411
    }
412
413
    /**
414
     * Returns an replacement array for ASCII methods with multiple languages.
415
     *
416
     * EXAMPLE: <code>
417
     * $array = ASCII::charsArrayWithSingleLanguageValues();
418
     * $tmpKey = \array_search('hnaik', $array['replace']);
419
     * echo $array['orig'][$tmpKey]; // '၌'
420
     * </code>
421
     *
422
     * @param bool $replace_extra_symbols [optional] <p>Add some more replacements e.g. "£" with " pound ".</p>
423
     * @param bool $asOrigReplaceArray    [optional] <p>TRUE === return {orig: string[], replace: string[]}
424
     *                                    array</p>
425
     *
426
     * @psalm-pure
427
     *
428
     * @return array
429
     *               <p>An array of replacements.</p>
430
     *
431
     * @psalm-return array{orig: string[], replace: string[]}|array<string, string>
432
     */
433
    public static function charsArrayWithSingleLanguageValues(
434
        bool $replace_extra_symbols = false,
435
        bool $asOrigReplaceArray = true
436
    ): array {
437
        // init
438
        /**
439
         * @var array<string,array>
440
         */
441
        static $CHARS_ARRAY = [];
442
        $cacheKey = '' . $replace_extra_symbols . '-' . $asOrigReplaceArray;
443
444
        if (isset($CHARS_ARRAY[$cacheKey])) {
445
            return $CHARS_ARRAY[$cacheKey];
446
        }
447
448
        if ($replace_extra_symbols) {
449
            self::prepareAsciiAndExtrasMaps();
450
451
            /** @noinspection AlterInForeachInspection */
452
            /** @psalm-suppress PossiblyNullIterator - we use the prepare* methods here, so we don't get NULL here */
453
            foreach (self::$ASCII_MAPS_AND_EXTRAS ?? [] as &$map) {
454
                $CHARS_ARRAY[$cacheKey][] = $map;
455
            }
456
        } else {
457
            self::prepareAsciiMaps();
458
459
            /** @noinspection AlterInForeachInspection */
460
            /** @psalm-suppress PossiblyNullIterator - we use the prepare* methods here, so we don't get NULL here */
461
            foreach (self::$ASCII_MAPS ?? [] as &$map) {
462
                $CHARS_ARRAY[$cacheKey][] = $map;
463
            }
464
        }
465
466
        $CHARS_ARRAY[$cacheKey] = \array_merge([], ...$CHARS_ARRAY[$cacheKey]);
467
468
        if ($asOrigReplaceArray) {
469
            $CHARS_ARRAY[$cacheKey] = [
470
                'orig'    => \array_keys($CHARS_ARRAY[$cacheKey]),
471
                'replace' => \array_values($CHARS_ARRAY[$cacheKey]),
472
            ];
473
        }
474
475
        return $CHARS_ARRAY[$cacheKey];
476
    }
477
478
    /**
479
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
480
     *
481
     * @param string $str                         <p>The string to be sanitized.</p>
482
     * @param bool   $normalize_whitespace        [optional] <p>Set to true, if you need to normalize the
483
     *                                            whitespace.</p>
484
     * @param bool   $normalize_msword            [optional] <p>Set to true, if you need to normalize MS Word chars
485
     *                                            e.g.: "…"
486
     *                                            => "..."</p>
487
     * @param bool   $keep_non_breaking_space     [optional] <p>Set to true, to keep non-breaking-spaces, in
488
     *                                            combination with
489
     *                                            $normalize_whitespace</p>
490
     * @param bool   $remove_invisible_characters [optional] <p>Set to false, if you not want to remove invisible
491
     *                                            characters e.g.: "\0"</p>
492
     *
493
     * @psalm-pure
494
     *
495
     * @return string
496
     *                <p>A clean UTF-8 string.</p>
497
     */
498
    public static function clean(
499
        string $str,
500
        bool $normalize_whitespace = true,
501
        bool $keep_non_breaking_space = false,
502
        bool $normalize_msword = true,
503
        bool $remove_invisible_characters = true
504
    ): string {
505
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
506
        // caused connection reset problem on larger strings
507
508
        $regex = '/
509
          (
510
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
511
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
512
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
513
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
514
            ){1,100}                      # ...one or more times
515
          )
516
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
517
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
518
        /x';
519
        $str = (string) \preg_replace($regex, '$1', $str);
520
521
        if ($normalize_whitespace) {
522
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
523
        }
524
525
        if ($normalize_msword) {
526
            $str = self::normalize_msword($str);
527
        }
528
529
        if ($remove_invisible_characters) {
530
            $str = self::remove_invisible_characters($str);
531
        }
532
533
        return $str;
534
    }
535
536
    /**
537
     * Checks if a string is 7 bit ASCII.
538
     *
539
     * EXAMPLE: <code>
540
     * ASCII::is_ascii('白'); // false
541
     * </code>
542
     *
543
     * @param string $str <p>The string to check.</p>
544
     *
545
     * @psalm-pure
546
     *
547
     * @return bool
548
     *              <p>
549
     *              <strong>true</strong> if it is ASCII<br>
550
     *              <strong>false</strong> otherwise
551
     *              </p>
552
     */
553
    public static function is_ascii(string $str): bool
554
    {
555
        if ($str === '') {
556
            return true;
557
        }
558
559
        return !\preg_match('/' . self::$REGEX_ASCII . '/', $str);
560
    }
561
562
    /**
563
     * Returns a string with smart quotes, ellipsis characters, and dashes from
564
     * Windows-1252 (commonly used in Word documents) replaced by their ASCII
565
     * equivalents.
566
     *
567
     * EXAMPLE: <code>
568
     * ASCII::normalize_msword('„Abcdef…”'); // '"Abcdef..."'
569
     * </code>
570
     *
571
     * @param string $str <p>The string to be normalized.</p>
572
     *
573
     * @psalm-pure
574
     *
575
     * @return string
576
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
577
     */
578
    public static function normalize_msword(string $str): string
579
    {
580
        if ($str === '') {
581
            return '';
582
        }
583
584
        /**
585
         * @var array{orig: string[], replace: string[]}
586
         */
587
        static $MSWORD_CACHE = ['orig' => [], 'replace' => []];
588
589
        if (empty($MSWORD_CACHE['orig'])) {
590
            self::prepareAsciiMaps();
591
592
            /**
593
             * @psalm-suppress PossiblyNullArrayAccess - we use the prepare* methods here, so we don't get NULL here
594
             *
595
             * @var array<string, string>
596
             */
597
            $map = self::$ASCII_MAPS[self::EXTRA_MSWORD_CHARS_LANGUAGE_CODE] ?? [];
598
599
            $MSWORD_CACHE = [
600
                'orig'    => \array_keys($map),
601
                'replace' => \array_values($map),
602
            ];
603
        }
604
605
        return \str_replace($MSWORD_CACHE['orig'], $MSWORD_CACHE['replace'], $str);
606
    }
607
608
    /**
609
     * Normalize the whitespace.
610
     *
611
     * EXAMPLE: <code>
612
     * ASCII::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"
613
     * </code>
614
     *
615
     * @param string $str                     <p>The string to be normalized.</p>
616
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
617
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
618
     *                                        bidirectional text chars.</p>
619
     *
620
     * @psalm-pure
621
     *
622
     * @return string
623
     *                <p>A string with normalized whitespace.</p>
624
     */
625
    public static function normalize_whitespace(
626
        string $str,
627
        bool $keepNonBreakingSpace = false,
628
        bool $keepBidiUnicodeControls = false
629
    ): string {
630
        if ($str === '') {
631
            return '';
632
        }
633
634
        /**
635
         * @var array<int,array<string,string>>
636
         */
637
        static $WHITESPACE_CACHE = [];
638
        $cacheKey = (int) $keepNonBreakingSpace;
639
640
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
641
            self::prepareAsciiMaps();
642
643
            $WHITESPACE_CACHE[$cacheKey] = self::$ASCII_MAPS[self::EXTRA_WHITESPACE_CHARS_LANGUAGE_CODE] ?? [];
644
645
            if ($keepNonBreakingSpace) {
646
                unset($WHITESPACE_CACHE[$cacheKey]["\xc2\xa0"]);
647
            }
648
649
            $WHITESPACE_CACHE[$cacheKey] = \array_keys($WHITESPACE_CACHE[$cacheKey]);
650
        }
651
652
        if (!$keepBidiUnicodeControls) {
653
            /**
654
             * @var array<int,string>|null
655
             */
656
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
657
658
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
659
                /** @noinspection PsalmLocalImmutableInspection */
660
                $BIDI_UNICODE_CONTROLS_CACHE = self::$BIDI_UNI_CODE_CONTROLS_TABLE;
661
            }
662
663
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
664
        }
665
666
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
667
    }
668
669
    /**
670
     * Remove invisible characters from a string.
671
     *
672
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
673
     *
674
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
675
     *
676
     * @param string $str
677
     * @param bool   $url_encoded
678
     * @param string $replacement
679
     *
680
     * @psalm-pure
681
     *
682
     * @return string
683
     */
684
    public static function remove_invisible_characters(
685
        string $str,
686
        bool $url_encoded = false,
687
        string $replacement = ''
688
    ): string {
689
        // init
690
        $non_displayables = [];
691
692
        // every control character except:
693
        // - newline (dec 10),
694
        // - carriage return (dec 13),
695
        // - horizontal tab (dec 09)
696
        if ($url_encoded) {
697
            $non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15
698
            $non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
699
        }
700
701
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
702
703
        do {
704
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
705
        } while ($count !== 0);
706
707
        return $str;
708
    }
709
710
    /**
711
     * Returns an ASCII version of the string. A set of non-ASCII characters are
712
     * replaced with their closest ASCII counterparts, and the rest are removed
713
     * by default. The language or locale of the source string can be supplied
714
     * for language-specific transliteration in any of the following formats:
715
     * en, en_GB, or en-GB. For example, passing "de" results in "äöü" mapping
716
     * to "aeoeue" rather than "aou" as in other languages.
717
     *
718
     * EXAMPLE: <code>
719
     * ASCII::to_ascii('�Düsseldorf�', 'en'); // Dusseldorf
720
     * </code>
721
     *
722
     * @param string    $str                       <p>The input string.</p>
723
     * @param string    $language                  [optional] <p>Language of the source string.
724
     *                                             (default is 'en') | ASCII::*_LANGUAGE_CODE</p>
725
     * @param bool      $remove_unsupported_chars  [optional] <p>Whether or not to remove the
726
     *                                             unsupported characters.</p>
727
     * @param bool      $replace_extra_symbols     [optional]  <p>Add some more replacements e.g. "£" with " pound
728
     *                                             ".</p>
729
     * @param bool      $use_transliterate         [optional]  <p>Use ASCII::to_transliterate() for unknown chars.</p>
730
     * @param bool|null $replace_single_chars_only [optional]  <p>Single char replacement is better for the
731
     *                                             performance, but some languages need to replace more then one char
732
     *                                             at the same time. | NULL === auto-setting, depended on the
733
     *                                             language</p>
734
     *
735
     * @psalm-pure
736
     *
737
     * @return string
738
     *                <p>A string that contains only ASCII characters.</p>
739
     */
740
    public static function to_ascii(
741
        string $str,
742
        string $language = self::ENGLISH_LANGUAGE_CODE,
743
        bool $remove_unsupported_chars = true,
744
        bool $replace_extra_symbols = false,
745
        bool $use_transliterate = false,
746
        bool $replace_single_chars_only = null
747
    ): string {
748
        if ($str === '') {
749
            return '';
750
        }
751
752
        $language = self::get_language($language);
753
754
        static $EXTRA_SYMBOLS_CACHE = null;
755
756
        /**
757
         * @var array<string,array<string,string>>
758
         */
759
        static $REPLACE_HELPER_CACHE = [];
760
        $cacheKey = $language . '-' . $replace_extra_symbols;
761
762
        if (!isset($REPLACE_HELPER_CACHE[$cacheKey])) {
763
            $langAll = self::charsArrayWithSingleLanguageValues($replace_extra_symbols, false);
764
765
            $langSpecific = self::charsArrayWithOneLanguage($language, $replace_extra_symbols, false);
766
767
            if ($langSpecific === []) {
768
                $REPLACE_HELPER_CACHE[$cacheKey] = $langAll;
769
            } else {
770
                $REPLACE_HELPER_CACHE[$cacheKey] = \array_merge([], $langAll, $langSpecific);
771
            }
772
        }
773
774
        if (
775
            $replace_extra_symbols
776
            &&
777
            $EXTRA_SYMBOLS_CACHE === null
778
        ) {
779
            $EXTRA_SYMBOLS_CACHE = [];
780
            foreach (self::$ASCII_EXTRAS ?? [] as $extrasLanguageTmp => $extrasDataTmp) {
781
                foreach ($extrasDataTmp as $extrasDataKeyTmp => $extrasDataValueTmp) {
782
                    $EXTRA_SYMBOLS_CACHE[$extrasDataKeyTmp] = $extrasDataKeyTmp;
783
                }
784
            }
785
            $EXTRA_SYMBOLS_CACHE = \implode('', $EXTRA_SYMBOLS_CACHE);
786
        }
787
788
        $charDone = [];
789
        if (\preg_match_all('/' . self::$REGEX_ASCII . ($replace_extra_symbols ? '|[' . $EXTRA_SYMBOLS_CACHE . ']' : '') . '/u', $str, $matches)) {
790
            if (!$replace_single_chars_only) {
791
                if (self::$LANGUAGE_MAX_KEY === null) {
792
                    /** @noinspection PsalmLocalImmutableInspection */
793
                    self::$LANGUAGE_MAX_KEY = self::getData('ascii_language_max_key');
794
                }
795
796
                $maxKeyLength = self::$LANGUAGE_MAX_KEY[$language] ?? 0;
797
798
                if ($maxKeyLength >= 5) {
799
                    foreach ($matches[0] as $keyTmp => $char) {
800
                        if (isset($matches[0][$keyTmp + 4])) {
801
                            $fiveChars = $matches[0][$keyTmp + 0] . $matches[0][$keyTmp + 1] . $matches[0][$keyTmp + 2] . $matches[0][$keyTmp + 3] . $matches[0][$keyTmp + 4];
802
                        } else {
803
                            $fiveChars = null;
804
                        }
805
                        if (
806
                            $fiveChars
807
                            &&
808
                            !isset($charDone[$fiveChars])
809
                            &&
810
                            isset($REPLACE_HELPER_CACHE[$cacheKey][$fiveChars])
811
                            &&
812
                            \strpos($str, $fiveChars) !== false
813
                        ) {
814
                            // DEBUG
815
                            //\var_dump($str, $fiveChars, $REPLACE_HELPER_CACHE[$cacheKey][$fiveChars]);
816
817
                            $charDone[$fiveChars] = true;
818
                            $str = \str_replace($fiveChars, $REPLACE_HELPER_CACHE[$cacheKey][$fiveChars], $str);
819
820
                            // DEBUG
821
                            //\var_dump($str, "\n");
822
                        }
823
                    }
824
                }
825
826
                if ($maxKeyLength >= 4) {
827
                    foreach ($matches[0] as $keyTmp => $char) {
828
                        if (isset($matches[0][$keyTmp + 3])) {
829
                            $fourChars = $matches[0][$keyTmp + 0] . $matches[0][$keyTmp + 1] . $matches[0][$keyTmp + 2] . $matches[0][$keyTmp + 3];
830
                        } else {
831
                            $fourChars = null;
832
                        }
833
                        if (
834
                            $fourChars
835
                            &&
836
                            !isset($charDone[$fourChars])
837
                            &&
838
                            isset($REPLACE_HELPER_CACHE[$cacheKey][$fourChars])
839
                            &&
840
                            \strpos($str, $fourChars) !== false
841
                        ) {
842
                            // DEBUG
843
                            //\var_dump($str, $fourChars, $REPLACE_HELPER_CACHE[$cacheKey][$fourChars]);
844
845
                            $charDone[$fourChars] = true;
846
                            $str = \str_replace($fourChars, $REPLACE_HELPER_CACHE[$cacheKey][$fourChars], $str);
847
848
                            // DEBUG
849
                            //\var_dump($str, "\n");
850
                        }
851
                    }
852
                }
853
854
                foreach ($matches[0] as $keyTmp => $char) {
855
                    if (isset($matches[0][$keyTmp + 2])) {
856
                        $threeChars = $matches[0][$keyTmp + 0] . $matches[0][$keyTmp + 1] . $matches[0][$keyTmp + 2];
857
                    } else {
858
                        $threeChars = null;
859
                    }
860
                    if (
861
                        $threeChars
862
                        &&
863
                        !isset($charDone[$threeChars])
864
                        &&
865
                        isset($REPLACE_HELPER_CACHE[$cacheKey][$threeChars])
866
                        &&
867
                        \strpos($str, $threeChars) !== false
868
                    ) {
869
                        // DEBUG
870
                        //\var_dump($str, $threeChars, $REPLACE_HELPER_CACHE[$cacheKey][$threeChars]);
871
872
                        $charDone[$threeChars] = true;
873
                        $str = \str_replace($threeChars, $REPLACE_HELPER_CACHE[$cacheKey][$threeChars], $str);
874
875
                        // DEBUG
876
                        //\var_dump($str, "\n");
877
                    }
878
                }
879
880
                foreach ($matches[0] as $keyTmp => $char) {
881
                    if (isset($matches[0][$keyTmp + 1])) {
882
                        $twoChars = $matches[0][$keyTmp + 0] . $matches[0][$keyTmp + 1];
883
                    } else {
884
                        $twoChars = null;
885
                    }
886
                    if (
887
                        $twoChars
888
                        &&
889
                        !isset($charDone[$twoChars])
890
                        &&
891
                        isset($REPLACE_HELPER_CACHE[$cacheKey][$twoChars])
892
                        &&
893
                        \strpos($str, $twoChars) !== false
894
                    ) {
895
                        // DEBUG
896
                        //\var_dump($str, $twoChars, $REPLACE_HELPER_CACHE[$cacheKey][$twoChars]);
897
898
                        $charDone[$twoChars] = true;
899
                        $str = \str_replace($twoChars, $REPLACE_HELPER_CACHE[$cacheKey][$twoChars], $str);
900
901
                        // DEBUG
902
                        //\var_dump($str, "\n");
903
                    }
904
                }
905
            }
906
907
            foreach ($matches[0] as $keyTmp => $char) {
908
                if (
909
                    !isset($charDone[$char])
910
                    &&
911
                    isset($REPLACE_HELPER_CACHE[$cacheKey][$char])
912
                    &&
913
                    \strpos($str, $char) !== false
914
                ) {
915
                    // DEBUG
916
                    //\var_dump($str, $char, $REPLACE_HELPER_CACHE[$cacheKey][$char]);
917
918
                    $charDone[$char] = true;
919
                    $str = \str_replace($char, $REPLACE_HELPER_CACHE[$cacheKey][$char], $str);
920
921
                    // DEBUG
922
                    //\var_dump($str, "\n");
923
                }
924
            }
925
        }
926
927
        /** @psalm-suppress PossiblyNullOperand - we use the prepare* methods here, so we don't get NULL here */
928
        if (!isset(self::$ASCII_MAPS[$language])) {
929
            $use_transliterate = true;
930
        }
931
932
        if ($use_transliterate) {
933
            /** @noinspection ArgumentEqualsDefaultValueInspection */
934
            $str = self::to_transliterate($str, null, false);
935
        }
936
937
        if ($remove_unsupported_chars) {
938
            $str = (string) \str_replace(["\n\r", "\n", "\r", "\t"], ' ', $str);
939
            $str = (string) \preg_replace('/' . self::$REGEX_ASCII . '/', '', $str);
940
        }
941
942
        return $str;
943
    }
944
945
    /**
946
     * Convert given string to safe filename (and keep string case).
947
     *
948
     * EXAMPLE: <code>
949
     * ASCII::to_filename('שדגשדג.png', true)); // 'shdgshdg.png'
950
     * </code>
951
     *
952
     * @param string $str
953
     * @param bool   $use_transliterate <p>ASCII::to_transliterate() is used by default - unsafe characters are
954
     *                                  simply replaced with hyphen otherwise.</p>
955
     * @param string $fallback_char
956
     *
957
     * @psalm-pure
958
     *
959
     * @return string
960
     *                <p>A string that contains only safe characters for a filename.</p>
961
     */
962
    public static function to_filename(
963
        string $str,
964
        bool $use_transliterate = true,
965
        string $fallback_char = '-'
966
    ): string {
967
        if ($use_transliterate) {
968
            $str = self::to_transliterate($str, $fallback_char);
969
        }
970
971
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
972
973
        $str = (string) \preg_replace(
974
            [
975
                '/[^' . $fallback_char_escaped . '.\\-a-zA-Z0-9\\s]/', // 1) remove un-needed chars
976
                '/[\\s]+/u',                                           // 2) convert spaces to $fallback_char
977
                '/[' . $fallback_char_escaped . ']+/u',                // 3) remove double $fallback_char's
978
            ],
979
            [
980
                '',
981
                $fallback_char,
982
                $fallback_char,
983
            ],
984
            $str
985
        );
986
987
        return \trim($str, $fallback_char);
988
    }
989
990
    /**
991
     * Converts the string into an URL slug. This includes replacing non-ASCII
992
     * characters with their closest ASCII equivalents, removing remaining
993
     * non-ASCII and non-alphanumeric characters, and replacing whitespace with
994
     * $separator. The separator defaults to a single dash, and the string
995
     * is also converted to lowercase. The language of the source string can
996
     * also be supplied for language-specific transliteration.
997
     *
998
     * @param string                $str
999
     * @param string                $separator             [optional] <p>The string used to replace whitespace.</p>
1000
     * @param string                $language              [optional] <p>Language of the source string.
1001
     *                                                     (default is 'en') | ASCII::*_LANGUAGE_CODE</p>
1002
     * @param array<string, string> $replacements          [optional] <p>A map of replaceable strings.</p>
1003
     * @param bool                  $replace_extra_symbols [optional]  <p>Add some more replacements e.g. "£" with "
1004
     *                                                     pound ".</p>
1005
     * @param bool                  $use_str_to_lower      [optional] <p>Use "string to lower" for the input.</p>
1006
     * @param bool                  $use_transliterate     [optional]  <p>Use ASCII::to_transliterate() for unknown
1007
     *                                                     chars.</p>
1008
     * @psalm-pure
1009
     *
1010
     * @return string
1011
     *                <p>A string that has been converted to an URL slug.</p>
1012
     */
1013
    public static function to_slugify(
1014
        string $str,
1015
        string $separator = '-',
1016
        string $language = self::ENGLISH_LANGUAGE_CODE,
1017
        array $replacements = [],
1018
        bool $replace_extra_symbols = false,
1019
        bool $use_str_to_lower = true,
1020
        bool $use_transliterate = false
1021
    ): string {
1022
        if ($str === '') {
1023
            return '';
1024
        }
1025
1026
        foreach ($replacements as $from => $to) {
1027
            $str = \str_replace($from, $to, $str);
1028
        }
1029
1030
        $str = self::to_ascii(
1031
            $str,
1032
            $language,
1033
            false,
1034
            $replace_extra_symbols,
1035
            $use_transliterate
1036
        );
1037
1038
        $str = \str_replace('@', $separator, $str);
1039
1040
        $str = (string) \preg_replace(
1041
            '/[^a-zA-Z\\d\\s\\-_' . \preg_quote($separator, '/') . ']/',
1042
            '',
1043
            $str
1044
        );
1045
1046
        if ($use_str_to_lower) {
1047
            $str = \strtolower($str);
1048
        }
1049
1050
        $str = (string) \preg_replace('/^[\'\\s]+|[\'\\s]+$/', '', $str);
1051
        $str = (string) \preg_replace('/\\B([A-Z])/', '-\1', $str);
1052
        $str = (string) \preg_replace('/[\\-_\\s]+/', $separator, $str);
1053
1054
        $l = \strlen($separator);
1055
        if ($l && \strpos($str, $separator) === 0) {
1056
            $str = (string) \substr($str, $l);
1057
        }
1058
1059
        if (\substr($str, -$l) === $separator) {
1060
            $str = (string) \substr($str, 0, \strlen($str) - $l);
1061
        }
1062
1063
        return $str;
1064
    }
1065
1066
    /**
1067
     * Returns an ASCII version of the string. A set of non-ASCII characters are
1068
     * replaced with their closest ASCII counterparts, and the rest are removed
1069
     * unless instructed otherwise.
1070
     *
1071
     * EXAMPLE: <code>
1072
     * ASCII::to_transliterate('déjà σσς iıii'); // 'deja sss iiii'
1073
     * </code>
1074
     *
1075
     * @param string      $str     <p>The input string.</p>
1076
     * @param string|null $unknown [optional] <p>Character use if character unknown. (default is '?')
1077
     *                             But you can also use NULL to keep the unknown chars.</p>
1078
     * @param bool        $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl
1079
     *
1080
     * @psalm-pure
1081
     *
1082
     * @return string
1083
     *                <p>A String that contains only ASCII characters.</p>
1084
     *
1085
     * @noinspection ParameterDefaultValueIsNotNullInspection
1086
     */
1087
    public static function to_transliterate(
1088
        string $str,
1089
        $unknown = '?',
1090
        bool $strict = false
1091
    ): string {
1092
        /**
1093
         * @var array<int,string>|null
1094
         */
1095
        static $UTF8_TO_TRANSLIT = null;
1096
1097
        /**
1098
         * null|\Transliterator
1099
         */
1100
        static $TRANSLITERATOR = null;
1101
1102
        /**
1103
         * @var bool|null
1104
         */
1105
        static $SUPPORT_INTL = null;
1106
1107
        if ($str === '') {
1108
            return '';
1109
        }
1110
1111
        if ($SUPPORT_INTL === null) {
1112
            $SUPPORT_INTL = \extension_loaded('intl');
1113
        }
1114
1115
        // check if we only have ASCII, first (better performance)
1116
        $str_tmp = $str;
1117
        if (self::is_ascii($str)) {
1118
            return $str;
1119
        }
1120
1121
        $str = self::clean($str);
1122
1123
        // check again, if we only have ASCII, now ...
1124
        if (
1125
            $str_tmp !== $str
1126
            &&
1127
            self::is_ascii($str)
1128
        ) {
1129
            return $str;
1130
        }
1131
1132
        if (
1133
            $strict
1134
            &&
1135
            $SUPPORT_INTL === true
1136
        ) {
1137
            if (!isset($TRANSLITERATOR)) {
1138
                // INFO: see "*-Latin" rules via "transliterator_list_ids()"
1139
                /**
1140
                 * @var \Transliterator
1141
                 */
1142
                $TRANSLITERATOR = \transliterator_create('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;');
1143
            }
1144
1145
            // INFO: https://unicode.org/cldr/utility/character.jsp
1146
            $str_tmp = \transliterator_transliterate($TRANSLITERATOR, $str);
1147
1148
            if ($str_tmp !== false) {
1149
1150
                // check again, if we only have ASCII, now ...
1151
                if (
1152
                    $str_tmp !== $str
1153
                    &&
1154
                    self::is_ascii($str_tmp)
1155
                ) {
1156
                    return $str_tmp;
1157
                }
1158
1159
                /** @noinspection CallableParameterUseCaseInTypeContextInspection */
1160
                $str = $str_tmp;
1161
            }
1162
        }
1163
1164
        if (self::$ORD === null) {
1165
            /** @noinspection PsalmLocalImmutableInspection */
1166
            self::$ORD = self::getData('ascii_ord');
1167
        }
1168
1169
        \preg_match_all('/.|[^\x00]$/us', $str, $array_tmp);
1170
        $chars = $array_tmp[0];
1171
        $ord = null;
1172
        $str_tmp = '';
1173
        foreach ($chars as &$c) {
1174
            $ordC0 = self::$ORD[$c[0]];
1175
1176
            if ($ordC0 >= 0 && $ordC0 <= 127) {
1177
                $str_tmp .= $c;
1178
1179
                continue;
1180
            }
1181
1182
            $ordC1 = self::$ORD[$c[1]];
1183
1184
            // ASCII - next please
1185
            if ($ordC0 >= 192 && $ordC0 <= 223) {
1186
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
1187
            }
1188
1189
            if ($ordC0 >= 224) {
1190
                $ordC2 = self::$ORD[$c[2]];
1191
1192
                if ($ordC0 <= 239) {
1193
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
1194
                }
1195
1196
                if ($ordC0 >= 240) {
1197
                    $ordC3 = self::$ORD[$c[3]];
1198
1199
                    if ($ordC0 <= 247) {
1200
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
1201
                    }
1202
1203
                    // We only process valid UTF-8 chars (<= 4 byte), so we don't need this code here ...
1204
                    /*
1205
                    if ($ordC0 >= 248) {
1206
                        $ordC4 = self::$ORD[$c[4]];
1207
1208
                        if ($ordC0 <= 251) {
1209
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
1210
                        }
1211
1212
                        if ($ordC0 >= 252) {
1213
                            $ordC5 = self::$ORD[$c[5]];
1214
1215
                            if ($ordC0 <= 253) {
1216
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
1217
                            }
1218
                        }
1219
                    }
1220
                     */
1221
                }
1222
            }
1223
1224
            if (
1225
                $ordC0 === 254
1226
                ||
1227
                $ordC0 === 255
1228
                ||
1229
                $ord === null
1230
            ) {
1231
                $str_tmp .= $unknown ?? $c;
1232
1233
                continue;
1234
            }
1235
1236
            $bank = $ord >> 8;
1237
            if (!isset($UTF8_TO_TRANSLIT[$bank])) {
1238
                $UTF8_TO_TRANSLIT[$bank] = self::getDataIfExists(\sprintf('x%03x', $bank));
1239
            }
1240
1241
            $new_char = $ord & 255;
1242
1243
            if (isset($UTF8_TO_TRANSLIT[$bank][$new_char])) {
1244
1245
                // keep for debugging
1246
                /*
1247
                echo "file: " . sprintf('x%02x', $bank) . "\n";
1248
                echo "char: " . $c . "\n";
1249
                echo "ord: " . $ord . "\n";
1250
                echo "new_char: " . $new_char . "\n";
1251
                echo "new_char: " . mb_chr($new_char) . "\n";
1252
                echo "ascii: " . $UTF8_TO_TRANSLIT[$bank][$new_char] . "\n";
1253
                echo "bank:" . $bank . "\n\n";
1254
                 */
1255
1256
                $new_char = $UTF8_TO_TRANSLIT[$bank][$new_char];
1257
1258
                /** @noinspection MissingOrEmptyGroupStatementInspection */
1259
                /** @noinspection PhpStatementHasEmptyBodyInspection */
1260
                if ($unknown === null && $new_char === '') {
1261
                    // nothing
1262
                } elseif (
1263
                    $new_char === '[?]'
1264
                    ||
1265
                    $new_char === '[?] '
1266
                ) {
1267
                    $c = $unknown ?? $c;
1268
                } else {
1269
                    $c = $new_char;
1270
                }
1271
            } else {
1272
1273
                // keep for debugging missing chars
1274
                /*
1275
                echo "file: " . sprintf('x%02x', $bank) . "\n";
1276
                echo "char: " . $c . "\n";
1277
                echo "ord: " . $ord . "\n";
1278
                echo "new_char: " . $new_char . "\n";
1279
                echo "new_char: " . mb_chr($new_char) . "\n";
1280
                echo "bank:" . $bank . "\n\n";
1281
                 */
1282
1283
                $c = $unknown ?? $c;
1284
            }
1285
1286
            $str_tmp .= $c;
1287
        }
1288
1289
        return $str_tmp;
1290
    }
1291
1292
    /**
1293
     * Get the language from a string.
1294
     *
1295
     * e.g.: de_at -> de_at
1296
     *       de_DE -> de
1297
     *       DE_DE -> de
1298
     *       de-de -> de
1299
     *
1300
     * @noinspection ReturnTypeCanBeDeclaredInspection
1301
     *
1302
     * @param string $language
1303
     *
1304
     * @psalm-pure
1305
     *
1306
     * @return string
1307
     */
1308
    private static function get_language(string $language)
1309
    {
1310
        if ($language === '') {
1311
            return '';
1312
        }
1313
1314
        if (
1315
            \strpos($language, '_') === false
1316
            &&
1317
            \strpos($language, '-') === false
1318
        ) {
1319
            return \strtolower($language);
1320
        }
1321
1322
        $regex = '/(?<first>[a-z]+)[\-_]\g{first}/i';
1323
1324
        return \str_replace(
1325
            '-',
1326
            '_',
1327
            \strtolower(
1328
                (string) \preg_replace($regex, '$1', $language)
1329
            )
1330
        );
1331
    }
1332
1333
    /**
1334
     * Get data from "/data/*.php".
1335
     *
1336
     * @noinspection ReturnTypeCanBeDeclaredInspection
1337
     *
1338
     * @param string $file
1339
     *
1340
     * @psalm-pure
1341
     *
1342
     * @return array<mixed>
1343
     */
1344
    private static function getData(string $file)
1345
    {
1346
        /** @noinspection PhpIncludeInspection */
1347
        /** @noinspection UsingInclusionReturnValueInspection */
1348
        /** @psalm-suppress UnresolvableInclude */
1349
        return include __DIR__ . '/data/' . $file . '.php';
1350
    }
1351
1352
    /**
1353
     * Get data from "/data/*.php".
1354
     *
1355
     * @param string $file
1356
     *
1357
     * @psalm-pure
1358
     *
1359
     * @return array<mixed>
1360
     */
1361
    private static function getDataIfExists(string $file): array
1362
    {
1363
        $file = __DIR__ . '/data/' . $file . '.php';
1364
        /**
1365
         * @noinspection LowPerformingFilesystemOperationsInspection
1366
         * -> we use this only once, so no extra caching is needed
1367
         */
1368
        if (\file_exists($file)) {
1369
            /** @noinspection PhpIncludeInspection */
1370
            /** @noinspection UsingInclusionReturnValueInspection */
1371
            return include $file;
1372
        }
1373
1374
        return [];
1375
    }
1376
1377
    /**
1378
     * @psalm-pure
1379
     *
1380
     * @return void
1381
     */
1382
    private static function prepareAsciiAndExtrasMaps()
1383
    {
1384
        if (self::$ASCII_MAPS_AND_EXTRAS === null) {
1385
            self::prepareAsciiMaps();
1386
            self::prepareAsciiExtras();
1387
1388
            /** @psalm-suppress PossiblyNullArgument - we use the prepare* methods here, so we don't get NULL here */
1389
            /** @noinspection PsalmLocalImmutableInspection */
1390
            self::$ASCII_MAPS_AND_EXTRAS = \array_merge_recursive(
1391
                self::$ASCII_MAPS ?? [],
1392
                self::$ASCII_EXTRAS ?? []
1393
            );
1394
        }
1395
    }
1396
1397
    /**
1398
     * @psalm-pure
1399
     *
1400
     * @return void
1401
     */
1402
    private static function prepareAsciiMaps()
1403
    {
1404
        if (self::$ASCII_MAPS === null) {
1405
            /** @noinspection PsalmLocalImmutableInspection */
1406
            self::$ASCII_MAPS = self::getData('ascii_by_languages');
1407
        }
1408
    }
1409
1410
    /**
1411
     * @psalm-pure
1412
     *
1413
     * @return void
1414
     */
1415
    private static function prepareAsciiExtras()
1416
    {
1417
        if (self::$ASCII_EXTRAS === null) {
1418
            /** @noinspection PsalmLocalImmutableInspection */
1419
            self::$ASCII_EXTRAS = self::getData('ascii_extras_by_languages');
1420
        }
1421
    }
1422
}
1423