Passed
Push — 2.0 ( 401112...fa4b31 )
by Greg
13:39
created

I18N::comparator()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 10
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 5
nc 2
nop 0
dl 0
loc 10
rs 10
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * webtrees: online genealogy
5
 * Copyright (C) 2021 webtrees development team
6
 * This program is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 * You should have received a copy of the GNU General Public License
15
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
 */
17
18
declare(strict_types=1);
19
20
namespace Fisharebest\Webtrees;
21
22
use Closure;
23
use Collator;
24
use Exception;
25
use Fisharebest\Localization\Locale;
26
use Fisharebest\Localization\Locale\LocaleEnUs;
27
use Fisharebest\Localization\Locale\LocaleInterface;
28
use Fisharebest\Localization\Translation;
29
use Fisharebest\Localization\Translator;
30
use Fisharebest\Webtrees\Module\ModuleCustomInterface;
31
use Fisharebest\Webtrees\Module\ModuleLanguageInterface;
32
use Fisharebest\Webtrees\Services\ModuleService;
33
34
use function array_merge;
35
use function class_exists;
36
use function html_entity_decode;
37
use function in_array;
38
use function mb_strtolower;
39
use function mb_strtoupper;
40
use function mb_substr;
41
use function ord;
42
use function sprintf;
43
use function str_contains;
44
use function str_replace;
45
use function strcmp;
46
use function strip_tags;
47
use function strlen;
48
use function strtr;
49
use function var_export;
50
51
/**
52
 * Internationalization (i18n) and localization (l10n).
53
 */
54
class I18N
55
{
56
    // MO files use special characters for plurals and context.
57
    public const PLURAL  = "\x00";
58
    public const CONTEXT = "\x04";
59
60
    // Digits are always rendered LTR, even in RTL text.
61
    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
62
63
    // These locales need special handling for the dotless letter I.
64
    private const DOTLESS_I_LOCALES = [
65
        'az',
66
        'tr',
67
    ];
68
69
    private const DOTLESS_I_TOLOWER = [
70
        'I' => 'ı',
71
        'İ' => 'i',
72
    ];
73
74
    private const DOTLESS_I_TOUPPER = [
75
        'ı' => 'I',
76
        'i' => 'İ',
77
    ];
78
79
    // The ranges of characters used by each script.
80
    private const SCRIPT_CHARACTER_RANGES = [
81
        [
82
            'Latn',
83
            0x0041,
84
            0x005A,
85
        ],
86
        [
87
            'Latn',
88
            0x0061,
89
            0x007A,
90
        ],
91
        [
92
            'Latn',
93
            0x0100,
94
            0x02AF,
95
        ],
96
        [
97
            'Grek',
98
            0x0370,
99
            0x03FF,
100
        ],
101
        [
102
            'Cyrl',
103
            0x0400,
104
            0x052F,
105
        ],
106
        [
107
            'Hebr',
108
            0x0590,
109
            0x05FF,
110
        ],
111
        [
112
            'Arab',
113
            0x0600,
114
            0x06FF,
115
        ],
116
        [
117
            'Arab',
118
            0x0750,
119
            0x077F,
120
        ],
121
        [
122
            'Arab',
123
            0x08A0,
124
            0x08FF,
125
        ],
126
        [
127
            'Deva',
128
            0x0900,
129
            0x097F,
130
        ],
131
        [
132
            'Taml',
133
            0x0B80,
134
            0x0BFF,
135
        ],
136
        [
137
            'Sinh',
138
            0x0D80,
139
            0x0DFF,
140
        ],
141
        [
142
            'Thai',
143
            0x0E00,
144
            0x0E7F,
145
        ],
146
        [
147
            'Geor',
148
            0x10A0,
149
            0x10FF,
150
        ],
151
        [
152
            'Grek',
153
            0x1F00,
154
            0x1FFF,
155
        ],
156
        [
157
            'Deva',
158
            0xA8E0,
159
            0xA8FF,
160
        ],
161
        [
162
            'Hans',
163
            0x3000,
164
            0x303F,
165
        ],
166
        // Mixed CJK, not just Hans
167
        [
168
            'Hans',
169
            0x3400,
170
            0xFAFF,
171
        ],
172
        // Mixed CJK, not just Hans
173
        [
174
            'Hans',
175
            0x20000,
176
            0x2FA1F,
177
        ],
178
        // Mixed CJK, not just Hans
179
    ];
180
181
    // Characters that are displayed in mirror form in RTL text.
182
    private const MIRROR_CHARACTERS = [
183
        '('  => ')',
184
        ')'  => '(',
185
        '['  => ']',
186
        ']'  => '[',
187
        '{'  => '}',
188
        '}'  => '{',
189
        '<'  => '>',
190
        '>'  => '<',
191
        '‹ ' => '›',
192
        '› ' => '‹',
193
        '«'  => '»',
194
        '»'  => '«',
195
        '﴾ ' => '﴿',
196
        '﴿ ' => '﴾',
197
        '“ ' => '”',
198
        '” ' => '“',
199
        '‘ ' => '’',
200
        '’ ' => '‘',
201
    ];
202
    /** @var string Punctuation used to separate list items, typically a comma */
203
    public static $list_separator;
204
205
    /** @var ModuleLanguageInterface|null */
206
    private static $language;
207
208
    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
209
    private static $locale;
210
211
    /** @var Translator An object that performs translation */
212
    private static $translator;
213
214
    /** @var  Collator|null From the php-intl library */
215
    private static $collator;
216
217
    /**
218
     * The preferred locales for this site, or a default list if no preference.
219
     *
220
     * @return array<LocaleInterface>
221
     */
222
    public static function activeLocales(): array
223
    {
224
        $locales = app(ModuleService::class)
225
            ->findByInterface(ModuleLanguageInterface::class, false, true)
226
            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
227
                return $module->locale();
228
            });
229
230
        if ($locales->isEmpty()) {
231
            return [new LocaleEnUs()];
232
        }
233
234
        return $locales->all();
235
    }
236
237
    /**
238
     * Which MySQL collation should be used for this locale?
239
     *
240
     * @return string
241
     */
242
    public static function collation(): string
243
    {
244
        $collation = self::$locale->collation();
245
        switch ($collation) {
246
            case 'croatian_ci':
247
            case 'german2_ci':
248
            case 'vietnamese_ci':
249
                // Only available in MySQL 5.6
250
                return 'utf8_unicode_ci';
251
            default:
252
                return 'utf8_' . $collation;
253
        }
254
    }
255
256
    /**
257
     * What format is used to display dates in the current locale?
258
     *
259
     * @return string
260
     */
261
    public static function dateFormat(): string
262
    {
263
        /* I18N: This is the format string for full dates. See https://php.net/date for codes */
264
        return self::$translator->translate('%j %F %Y');
265
    }
266
267
    /**
268
     * Convert the digits 0-9 into the local script
269
     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
270
     *
271
     * @param string|int $n
272
     *
273
     * @return string
274
     */
275
    public static function digits($n): string
276
    {
277
        return self::$locale->digits((string) $n);
278
    }
279
280
    /**
281
     * What is the direction of the current locale
282
     *
283
     * @return string "ltr" or "rtl"
284
     */
285
    public static function direction(): string
286
    {
287
        return self::$locale->direction();
288
    }
289
290
    /**
291
     * Initialise the translation adapter with a locale setting.
292
     *
293
     * @param string $code
294
     * @param bool   $setup
295
     *
296
     * @return void
297
     */
298
    public static function init(string $code, bool $setup = false): void
299
    {
300
        self::$locale = Locale::create($code);
301
302
        // Load the translation file
303
        $translation_file = __DIR__ . '/../resources/lang/' . self::$locale->languageTag() . '/messages.php';
304
305
        try {
306
            $translation  = new Translation($translation_file);
307
            $translations = $translation->asArray();
308
        } catch (Exception $ex) {
309
            // The translations files are created during the build process, and are
310
            // not included in the source code.
311
            // Assuming we are using dev code, and build (or rebuild) the files.
312
            $po_file      = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.po';
313
            $translation  = new Translation($po_file);
314
            $translations = $translation->asArray();
315
            file_put_contents($translation_file, "<?php\n\nreturn " . var_export($translations, true) . ";\n");
316
        }
317
318
        // Add translations from custom modules (but not during setup, as we have no database/modules)
319
        if (!$setup) {
320
            $module_service = app(ModuleService::class);
321
322
            $translations = $module_service
323
                ->findByInterface(ModuleCustomInterface::class)
324
                ->reduce(static function (array $carry, ModuleCustomInterface $item): array {
325
                    return array_merge($carry, $item->customTranslations(self::$locale->languageTag()));
326
                }, $translations);
327
328
            self::$language = $module_service
329
                ->findByInterface(ModuleLanguageInterface::class)
330
                ->first(function (ModuleLanguageInterface $module) use ($code): bool {
331
                    return $module->locale()->languageTag() === $code;
332
                });
333
        }
334
335
        // Create a translator
336
        self::$translator = new Translator($translations, self::$locale->pluralRule());
337
338
        /* I18N: This punctuation is used to separate lists of items */
339
        self::$list_separator = self::translate(', ');
340
341
        // Create a collator
342
        try {
343
            if (class_exists('Collator')) {
344
                // Symfony provides a very incomplete polyfill - which cannot be used.
345
                self::$collator = new Collator(self::$locale->code());
346
                // Ignore upper/lower case differences
347
                self::$collator->setStrength(Collator::SECONDARY);
348
            }
349
        } catch (Exception $ex) {
350
            // PHP-INTL is not installed?  We'll use a fallback later.
351
            self::$collator = null;
352
        }
353
    }
354
355
    /**
356
     * Translate a string, and then substitute placeholders
357
     * echo I18N::translate('Hello World!');
358
     * echo I18N::translate('The %s sat on the mat', 'cat');
359
     *
360
     * @param string $message
361
     * @param string ...$args
362
     *
363
     * @return string
364
     */
365
    public static function translate(string $message, ...$args): string
366
    {
367
        $message = self::$translator->translate($message);
368
369
        return sprintf($message, ...$args);
370
    }
371
372
    /**
373
     * @return string
374
     */
375
    public static function languageTag(): string
376
    {
377
        return self::$locale->languageTag();
378
    }
379
380
    /**
381
     * @return LocaleInterface
382
     */
383
    public static function locale(): LocaleInterface
384
    {
385
        return self::$locale;
386
    }
387
388
    /**
389
     * @return ModuleLanguageInterface
390
     */
391
    public static function language(): ModuleLanguageInterface
392
    {
393
        return self::$language;
394
    }
395
396
    /**
397
     * Translate a number into the local representation.
398
     * e.g. 12345.67 becomes
399
     * en: 12,345.67
400
     * fr: 12 345,67
401
     * de: 12.345,67
402
     *
403
     * @param float $n
404
     * @param int   $precision
405
     *
406
     * @return string
407
     */
408
    public static function number(float $n, int $precision = 0): string
409
    {
410
        return self::$locale->number(round($n, $precision));
411
    }
412
413
    /**
414
     * Translate a fraction into a percentage.
415
     * e.g. 0.123 becomes
416
     * en: 12.3%
417
     * fr: 12,3 %
418
     * de: 12,3%
419
     *
420
     * @param float $n
421
     * @param int   $precision
422
     *
423
     * @return string
424
     */
425
    public static function percentage(float $n, int $precision = 0): string
426
    {
427
        return self::$locale->percent(round($n, $precision + 2));
428
    }
429
430
    /**
431
     * Translate a plural string
432
     * echo self::plural('There is an error', 'There are errors', $num_errors);
433
     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
434
     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
435
     *
436
     * @param string $singular
437
     * @param string $plural
438
     * @param int    $count
439
     * @param string ...$args
440
     *
441
     * @return string
442
     */
443
    public static function plural(string $singular, string $plural, int $count, ...$args): string
444
    {
445
        $message = self::$translator->translatePlural($singular, $plural, $count);
446
447
        return sprintf($message, ...$args);
448
    }
449
450
    /**
451
     * UTF8 version of PHP::strrev()
452
     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
453
     * These do not support UTF8 text direction, so we must mimic it for them.
454
     * Numbers are always rendered LTR, even in RTL text.
455
     * The visual direction of characters such as parentheses should be reversed.
456
     *
457
     * @param string $text Text to be reversed
458
     *
459
     * @return string
460
     */
461
    public static function reverseText(string $text): string
462
    {
463
        // Remove HTML markup - we can't display it and it is LTR.
464
        $text = strip_tags($text);
465
        // Remove HTML entities.
466
        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
467
468
        // LTR text doesn't need reversing
469
        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
470
            return $text;
471
        }
472
473
        // Mirrored characters
474
        $text = strtr($text, self::MIRROR_CHARACTERS);
475
476
        $reversed = '';
477
        $digits   = '';
478
        while ($text !== '') {
479
            $letter = mb_substr($text, 0, 1);
480
            $text   = mb_substr($text, 1);
481
            if (str_contains(self::DIGITS, $letter)) {
482
                $digits .= $letter;
483
            } else {
484
                $reversed = $letter . $digits . $reversed;
485
                $digits   = '';
486
            }
487
        }
488
489
        return $digits . $reversed;
490
    }
491
492
    /**
493
     * Return the direction (ltr or rtl) for a given script
494
     * The PHP/intl library does not provde this information, so we need
495
     * our own lookup table.
496
     *
497
     * @param string $script
498
     *
499
     * @return string
500
     */
501
    public static function scriptDirection(string $script): string
502
    {
503
        switch ($script) {
504
            case 'Arab':
505
            case 'Hebr':
506
            case 'Mong':
507
            case 'Thaa':
508
                return 'rtl';
509
            default:
510
                return 'ltr';
511
        }
512
    }
513
514
    /**
515
     * Identify the script used for a piece of text
516
     *
517
     * @param string $string
518
     *
519
     * @return string
520
     */
521
    public static function textScript(string $string): string
522
    {
523
        $string = strip_tags($string); // otherwise HTML tags show up as latin
524
        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
525
        $string = str_replace([
526
            Individual::NOMEN_NESCIO,
527
            Individual::PRAENOMEN_NESCIO,
528
        ], '', $string);
529
        $pos    = 0;
530
        $strlen = strlen($string);
531
        while ($pos < $strlen) {
532
            // get the Unicode Code Point for the character at position $pos
533
            $byte1 = ord($string[$pos]);
534
            if ($byte1 < 0x80) {
535
                $code_point = $byte1;
536
                $chrlen     = 1;
537
            } elseif ($byte1 < 0xC0) {
538
                // Invalid continuation character
539
                return 'Latn';
540
            } elseif ($byte1 < 0xE0) {
541
                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
542
                $chrlen     = 2;
543
            } elseif ($byte1 < 0xF0) {
544
                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
545
                $chrlen     = 3;
546
            } elseif ($byte1 < 0xF8) {
547
                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
548
                $chrlen     = 3;
549
            } else {
550
                // Invalid UTF
551
                return 'Latn';
552
            }
553
554
            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
555
                if ($code_point >= $range[1] && $code_point <= $range[2]) {
556
                    return $range[0];
557
                }
558
            }
559
            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
560
            $pos += $chrlen;
561
        }
562
563
        return 'Latn';
564
    }
565
566
    /**
567
     * Perform a case-insensitive comparison of two strings.
568
     *
569
     * @param string $string1
570
     * @param string $string2
571
     *
572
     * @return int
573
     */
574
    public static function strcasecmp(string $string1, string $string2): int
575
    {
576
        if (self::$collator instanceof Collator) {
577
            return self::$collator->compare($string1, $string2);
578
        }
579
580
        return strcmp(self::strtolower($string1), self::strtolower($string2));
581
    }
582
583
    /**
584
     * A closure which will compare strings using local collation rules.
585
     *
586
     * @return Closure
587
     */
588
    public static function comparator(): Closure
589
    {
590
        if (self::$collator instanceof Collator) {
591
            return static function (string $x, string $y): int {
592
                return (int) self::$collator->compare($x, $y);
0 ignored issues
show
Bug introduced by
The method compare() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

592
                return (int) self::$collator->/** @scrutinizer ignore-call */ compare($x, $y);

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
593
            };
594
        }
595
596
        return static function (string $x, string $y): int {
597
            return strcmp(self::strtolower($x), self::strtolower($y));
598
        };
599
    }
600
601
602
603
    /**
604
     * Convert a string to lower case.
605
     *
606
     * @param string $string
607
     *
608
     * @return string
609
     */
610
    public static function strtolower(string $string): string
611
    {
612
        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
613
            $string = strtr($string, self::DOTLESS_I_TOLOWER);
614
        }
615
616
        return mb_strtolower($string);
617
    }
618
619
    /**
620
     * Convert a string to upper case.
621
     *
622
     * @param string $string
623
     *
624
     * @return string
625
     */
626
    public static function strtoupper(string $string): string
627
    {
628
        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
629
            $string = strtr($string, self::DOTLESS_I_TOUPPER);
630
        }
631
632
        return mb_strtoupper($string);
633
    }
634
635
    /**
636
     * What format is used to display dates in the current locale?
637
     *
638
     * @return string
639
     */
640
    public static function timeFormat(): string
641
    {
642
        /* I18N: This is the format string for the time-of-day. See https://php.net/date for codes */
643
        return self::$translator->translate('%H:%i:%s');
644
    }
645
646
    /**
647
     * Context sensitive version of translate.
648
     * echo I18N::translateContext('NOMINATIVE', 'January');
649
     * echo I18N::translateContext('GENITIVE', 'January');
650
     *
651
     * @param string $context
652
     * @param string $message
653
     * @param string ...$args
654
     *
655
     * @return string
656
     */
657
    public static function translateContext(string $context, string $message, ...$args): string
658
    {
659
        $message = self::$translator->translateContext($context, $message);
660
661
        return sprintf($message, ...$args);
662
    }
663
}
664