Issues (2502)

app/I18N.php (1 issue)

Labels
Severity
1
<?php
2
3
/**
4
 * webtrees: online genealogy
5
 * Copyright (C) 2025 webtrees development team
6
 * This program is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 * You should have received a copy of the GNU General Public License
15
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
 */
17
18
declare(strict_types=1);
19
20
namespace Fisharebest\Webtrees;
21
22
use Closure;
23
use Collator;
24
use Exception;
25
use Fisharebest\Localization\Locale;
26
use Fisharebest\Localization\Locale\LocaleEnUs;
27
use Fisharebest\Localization\Locale\LocaleInterface;
28
use Fisharebest\Localization\Translation;
29
use Fisharebest\Localization\Translator;
30
use Fisharebest\Webtrees\Module\ModuleCustomInterface;
31
use Fisharebest\Webtrees\Module\ModuleLanguageInterface;
32
use Fisharebest\Webtrees\Services\ModuleService;
33
34
use function array_merge;
35
use function class_exists;
36
use function html_entity_decode;
37
use function in_array;
38
use function mb_strtolower;
39
use function mb_strtoupper;
40
use function mb_substr;
41
use function ord;
42
use function sprintf;
43
use function str_contains;
44
use function str_replace;
45
use function strcmp;
46
use function strip_tags;
47
use function strlen;
48
use function strtr;
49
use function var_export;
50
51
/**
52
 * Internationalization (i18n) and localization (l10n).
53
 */
54
class I18N
55
{
56
    // MO files use special characters for plurals and context.
57
    public const string PLURAL  = "\x00";
0 ignored issues
show
A parse error occurred: Syntax error, unexpected T_STRING, expecting '=' on line 57 at column 24
Loading history...
58
    public const string CONTEXT = "\x04";
59
60
    // Digits are always rendered LTR, even in RTL text.
61
    private const string DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
62
63
    // These locales need special handling for the dotless letter I.
64
    private const array DOTLESS_I_LOCALES = [
65
        'az',
66
        'tr',
67
    ];
68
69
    private const array DOTLESS_I_TOLOWER = [
70
        'I' => 'ı',
71
        'İ' => 'i',
72
    ];
73
74
    private const array DOTLESS_I_TOUPPER = [
75
        'ı' => 'I',
76
        'i' => 'İ',
77
    ];
78
79
    // The ranges of characters used by each script.
80
    private const array SCRIPT_CHARACTER_RANGES = [
81
        [
82
            'Latn',
83
            0x0041,
84
            0x005A,
85
        ],
86
        [
87
            'Latn',
88
            0x0061,
89
            0x007A,
90
        ],
91
        [
92
            'Latn',
93
            0x0100,
94
            0x02AF,
95
        ],
96
        [
97
            'Grek',
98
            0x0370,
99
            0x03FF,
100
        ],
101
        [
102
            'Cyrl',
103
            0x0400,
104
            0x052F,
105
        ],
106
        [
107
            'Hebr',
108
            0x0590,
109
            0x05FF,
110
        ],
111
        [
112
            'Arab',
113
            0x0600,
114
            0x06FF,
115
        ],
116
        [
117
            'Arab',
118
            0x0750,
119
            0x077F,
120
        ],
121
        [
122
            'Arab',
123
            0x08A0,
124
            0x08FF,
125
        ],
126
        [
127
            'Deva',
128
            0x0900,
129
            0x097F,
130
        ],
131
        [
132
            'Taml',
133
            0x0B80,
134
            0x0BFF,
135
        ],
136
        [
137
            'Sinh',
138
            0x0D80,
139
            0x0DFF,
140
        ],
141
        [
142
            'Thai',
143
            0x0E00,
144
            0x0E7F,
145
        ],
146
        [
147
            'Geor',
148
            0x10A0,
149
            0x10FF,
150
        ],
151
        [
152
            'Grek',
153
            0x1F00,
154
            0x1FFF,
155
        ],
156
        [
157
            'Deva',
158
            0xA8E0,
159
            0xA8FF,
160
        ],
161
        [
162
            'Hans',
163
            0x3000,
164
            0x303F,
165
        ],
166
        // Mixed CJK, not just Hans
167
        [
168
            'Hans',
169
            0x3400,
170
            0xFAFF,
171
        ],
172
        // Mixed CJK, not just Hans
173
        [
174
            'Hans',
175
            0x20000,
176
            0x2FA1F,
177
        ],
178
        // Mixed CJK, not just Hans
179
    ];
180
181
    // Characters that are displayed in mirror form in RTL text.
182
    private const array MIRROR_CHARACTERS = [
183
        '('  => ')',
184
        ')'  => '(',
185
        '['  => ']',
186
        ']'  => '[',
187
        '{'  => '}',
188
        '}'  => '{',
189
        '<'  => '>',
190
        '>'  => '<',
191
        '‹ ' => '›',
192
        '› ' => '‹',
193
        '«'  => '»',
194
        '»'  => '«',
195
        '﴾ ' => '﴿',
196
        '﴿ ' => '﴾',
197
        '“ ' => '”',
198
        '” ' => '“',
199
        '‘ ' => '’',
200
        '’ ' => '‘',
201
    ];
202
203
    // Punctuation used to separate list items, typically a comma
204
    public static string $list_separator;
205
206
    private static ModuleLanguageInterface $language;
207
208
    private static LocaleInterface $locale;
209
210
    private static Translator $translator;
211
212
    private static Collator|null $collator = null;
213
214
    /**
215
     * The preferred locales for this site, or a default list if no preference.
216
     *
217
     * @return array<LocaleInterface>
218
     */
219
    public static function activeLocales(): array
220
    {
221
        $locales = Registry::container()->get(ModuleService::class)
222
            ->findByInterface(ModuleLanguageInterface::class, false, true)
223
            ->map(static fn (ModuleLanguageInterface $module): LocaleInterface => $module->locale());
224
225
        if ($locales->isEmpty()) {
226
            return [new LocaleEnUs()];
227
        }
228
229
        return $locales->all();
230
    }
231
232
    /**
233
     * What format is used to display dates in the current locale?
234
     *
235
     * @return string
236
     */
237
    public static function dateFormat(): string
238
    {
239
        /* I18N: This is the format string for full dates. See https://php.net/date for codes */
240
        return self::$translator->translate('%j %F %Y');
241
    }
242
243
    /**
244
     * Convert the digits 0-9 into the local script
245
     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
246
     *
247
     * @param string|int $n
248
     *
249
     * @return string
250
     */
251
    public static function digits(string|int $n): string
252
    {
253
        return self::$locale->digits((string) $n);
254
    }
255
256
    /**
257
     * What is the direction of the current locale
258
     *
259
     * @return string "ltr" or "rtl"
260
     */
261
    public static function direction(): string
262
    {
263
        return self::$locale->direction();
264
    }
265
266
    /**
267
     * Initialise the translation adapter with a locale setting.
268
     *
269
     * @param string $code
270
     * @param bool   $setup
271
     *
272
     * @return void
273
     */
274
    public static function init(string $code, bool $setup = false): void
275
    {
276
        self::$locale = Locale::create($code);
277
278
        // Load the translation file
279
        $translation_file = __DIR__ . '/../resources/lang/' . self::$locale->languageTag() . '/messages.php';
280
281
        try {
282
            $translation  = new Translation($translation_file);
283
            $translations = $translation->asArray();
284
        } catch (Exception) {
285
            // The translations files are created during the build process, and are
286
            // not included in the source code.
287
            // Assuming we are using dev code, and build (or rebuild) the files.
288
            $po_file      = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.po';
289
            $translation  = new Translation($po_file);
290
            $translations = $translation->asArray();
291
            file_put_contents($translation_file, "<?php\n\nreturn " . var_export($translations, true) . ";\n");
292
        }
293
294
        // Add translations from custom modules (but not during setup, as we have no database/modules)
295
        if (!$setup) {
296
            $module_service = Registry::container()->get(ModuleService::class);
297
298
            $translations = $module_service
299
                ->findByInterface(ModuleCustomInterface::class)
300
                ->reduce(static fn (array $carry, ModuleCustomInterface $item): array => array_merge($carry, $item->customTranslations(self::$locale->languageTag())), $translations);
301
302
            self::$language = $module_service
303
                ->findByInterface(ModuleLanguageInterface::class, true)
304
                ->first(fn (ModuleLanguageInterface $module): bool => $module->locale()->languageTag() === $code);
305
        }
306
307
        // Create a translator
308
        self::$translator = new Translator($translations, self::$locale->pluralRule());
309
310
        /* I18N: This punctuation is used to separate lists of items */
311
        self::$list_separator = self::translate(', ');
312
313
        // Create a collator
314
        try {
315
            // Symfony provides a very incomplete polyfill - which cannot be used.
316
            if (class_exists('Collator')) {
317
                // Need phonebook collation rules for German Ä, Ö and Ü.
318
                if (str_contains(self::$locale->code(), '@')) {
319
                    self::$collator = new Collator(self::$locale->code() . ';collation=phonebook');
320
                } else {
321
                    self::$collator = new Collator(self::$locale->code() . '@collation=phonebook');
322
                }
323
                // Ignore upper/lower case differences
324
                self::$collator->setStrength(Collator::SECONDARY);
325
            }
326
        } catch (Exception) {
327
            // PHP-INTL is not installed?  We'll use a fallback later.
328
        }
329
    }
330
331
    /**
332
     * Translate a string, and then substitute placeholders
333
     * echo I18N::translate('Hello World!');
334
     * echo I18N::translate('The %s sat on the mat', 'cat');
335
     *
336
     * @param string $message
337
     * @param string ...$args
338
     *
339
     * @return string
340
     */
341
    public static function translate(string $message, ...$args): string
342
    {
343
        $message = self::$translator->translate($message);
344
345
        return sprintf($message, ...$args);
346
    }
347
348
    /**
349
     * @return string
350
     */
351
    public static function languageTag(): string
352
    {
353
        return self::$locale->languageTag();
354
    }
355
356
    public static function locale(): LocaleInterface
357
    {
358
        return self::$locale;
359
    }
360
361
    /**
362
     * @return ModuleLanguageInterface
363
     */
364
    public static function language(): ModuleLanguageInterface
365
    {
366
        return self::$language;
367
    }
368
369
    /**
370
     * Translate a number into the local representation.
371
     * e.g. 12345.67 becomes
372
     * en: 12,345.67
373
     * fr: 12 345,67
374
     * de: 12.345,67
375
     *
376
     * @param float $n
377
     * @param int   $precision
378
     *
379
     * @return string
380
     */
381
    public static function number(float $n, int $precision = 0): string
382
    {
383
        return self::$locale->number(round($n, $precision));
384
    }
385
386
    /**
387
     * Translate a fraction into a percentage.
388
     * e.g. 0.123 becomes
389
     * en: 12.3%
390
     * fr: 12,3 %
391
     * de: 12,3%
392
     *
393
     * @param float $n
394
     * @param int   $precision
395
     *
396
     * @return string
397
     */
398
    public static function percentage(float $n, int $precision = 0): string
399
    {
400
        return self::$locale->percent(round($n, $precision + 2));
401
    }
402
403
    /**
404
     * Translate a plural string
405
     * echo self::plural('There is an error', 'There are errors', $num_errors);
406
     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
407
     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
408
     *
409
     * @param string $singular
410
     * @param string $plural
411
     * @param int    $count
412
     * @param string ...$args
413
     *
414
     * @return string
415
     */
416
    public static function plural(string $singular, string $plural, int $count, ...$args): string
417
    {
418
        $message = self::$translator->translatePlural($singular, $plural, $count);
419
420
        return sprintf($message, ...$args);
421
    }
422
423
    /**
424
     * UTF8 version of PHP::strrev()
425
     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
426
     * These do not support UTF8 text direction, so we must mimic it for them.
427
     * Numbers are always rendered LTR, even in RTL text.
428
     * The visual direction of characters such as parentheses should be reversed.
429
     *
430
     * @param string $text Text to be reversed
431
     *
432
     * @return string
433
     */
434
    public static function reverseText(string $text): string
435
    {
436
        // Remove HTML markup - we can't display it and it is LTR.
437
        $text = strip_tags($text);
438
        // Remove HTML entities.
439
        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
440
441
        // LTR text doesn't need reversing
442
        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
443
            return $text;
444
        }
445
446
        // Mirrored characters
447
        $text = strtr($text, self::MIRROR_CHARACTERS);
448
449
        $reversed = '';
450
        $digits   = '';
451
        while ($text !== '') {
452
            $letter = mb_substr($text, 0, 1);
453
            $text   = mb_substr($text, 1);
454
            if (str_contains(self::DIGITS, $letter)) {
455
                $digits .= $letter;
456
            } else {
457
                $reversed = $letter . $digits . $reversed;
458
                $digits   = '';
459
            }
460
        }
461
462
        return $digits . $reversed;
463
    }
464
465
    /**
466
     * Return the direction (ltr or rtl) for a given script
467
     * The PHP/intl library does not provde this information, so we need
468
     * our own lookup table.
469
     *
470
     * @param string $script
471
     *
472
     * @return string
473
     */
474
    public static function scriptDirection(string $script): string
475
    {
476
        switch ($script) {
477
            case 'Arab':
478
            case 'Hebr':
479
            case 'Mong':
480
            case 'Thaa':
481
                return 'rtl';
482
            default:
483
                return 'ltr';
484
        }
485
    }
486
487
    /**
488
     * Identify the script used for a piece of text
489
     *
490
     * @param string $string
491
     *
492
     * @return string
493
     */
494
    public static function textScript(string $string): string
495
    {
496
        $string = strip_tags($string); // otherwise HTML tags show up as latin
497
        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
498
        $string = str_replace([
499
            Individual::NOMEN_NESCIO,
500
            Individual::PRAENOMEN_NESCIO,
501
        ], '', $string);
502
        $pos    = 0;
503
        $strlen = strlen($string);
504
        while ($pos < $strlen) {
505
            // get the Unicode Code Point for the character at position $pos
506
            $byte1 = ord($string[$pos]);
507
            if ($byte1 < 0x80) {
508
                $code_point = $byte1;
509
                $chrlen     = 1;
510
            } elseif ($byte1 < 0xC0) {
511
                // Invalid continuation character
512
                return 'Latn';
513
            } elseif ($byte1 < 0xE0) {
514
                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
515
                $chrlen     = 2;
516
            } elseif ($byte1 < 0xF0) {
517
                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
518
                $chrlen     = 3;
519
            } elseif ($byte1 < 0xF8) {
520
                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
521
                $chrlen     = 3;
522
            } else {
523
                // Invalid UTF
524
                return 'Latn';
525
            }
526
527
            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
528
                if ($code_point >= $range[1] && $code_point <= $range[2]) {
529
                    return $range[0];
530
                }
531
            }
532
            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
533
            $pos += $chrlen;
534
        }
535
536
        return 'Latn';
537
    }
538
539
    /**
540
     * A closure which will compare strings using local collation rules.
541
     *
542
     * @return Closure(string,string):int
543
     */
544
    public static function comparator(): Closure
545
    {
546
        $collator = self::$collator;
547
548
        if ($collator instanceof Collator) {
549
            return static fn (string $x, string $y): int => (int) $collator->compare($x, $y);
550
        }
551
552
        return static fn (string $x, string $y): int => strcmp(self::strtolower($x), self::strtolower($y));
553
    }
554
555
    /**
556
     * Convert a string to lower case.
557
     *
558
     * @param string $string
559
     *
560
     * @return string
561
     */
562
    public static function strtolower(string $string): string
563
    {
564
        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
565
            $string = strtr($string, self::DOTLESS_I_TOLOWER);
566
        }
567
568
        return mb_strtolower($string);
569
    }
570
571
    /**
572
     * Convert a string to upper case.
573
     *
574
     * @param string $string
575
     *
576
     * @return string
577
     */
578
    public static function strtoupper(string $string): string
579
    {
580
        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
581
            $string = strtr($string, self::DOTLESS_I_TOUPPER);
582
        }
583
584
        return mb_strtoupper($string);
585
    }
586
587
    /**
588
     * What format is used to display dates in the current locale?
589
     *
590
     * @return string
591
     */
592
    public static function timeFormat(): string
593
    {
594
        /* I18N: This is the format string for the time-of-day. See https://php.net/date for codes */
595
        return self::$translator->translate('%H:%i:%s');
596
    }
597
598
    /**
599
     * Context sensitive version of translate.
600
     * echo I18N::translateContext('NOMINATIVE', 'January');
601
     * echo I18N::translateContext('GENITIVE', 'January');
602
     *
603
     * @param string $context
604
     * @param string $message
605
     * @param string ...$args
606
     *
607
     * @return string
608
     */
609
    public static function translateContext(string $context, string $message, ...$args): string
610
    {
611
        $message = self::$translator->translateContext($context, $message);
612
613
        return sprintf($message, ...$args);
614
    }
615
}
616