|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/** |
|
4
|
|
|
* webtrees: online genealogy |
|
5
|
|
|
* Copyright (C) 2019 webtrees development team |
|
6
|
|
|
* This program is free software: you can redistribute it and/or modify |
|
7
|
|
|
* it under the terms of the GNU General Public License as published by |
|
8
|
|
|
* the Free Software Foundation, either version 3 of the License, or |
|
9
|
|
|
* (at your option) any later version. |
|
10
|
|
|
* This program is distributed in the hope that it will be useful, |
|
11
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
13
|
|
|
* GNU General Public License for more details. |
|
14
|
|
|
* You should have received a copy of the GNU General Public License |
|
15
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
16
|
|
|
*/ |
|
17
|
|
|
|
|
18
|
|
|
declare(strict_types=1); |
|
19
|
|
|
|
|
20
|
|
|
namespace Fisharebest\Webtrees\Services; |
|
21
|
|
|
|
|
22
|
|
|
use Fisharebest\ExtCalendar\ArabicCalendar; |
|
23
|
|
|
use Fisharebest\ExtCalendar\CalendarInterface; |
|
24
|
|
|
use Fisharebest\ExtCalendar\GregorianCalendar; |
|
25
|
|
|
use Fisharebest\ExtCalendar\JewishCalendar; |
|
26
|
|
|
use Fisharebest\ExtCalendar\PersianCalendar; |
|
27
|
|
|
use Fisharebest\Localization\Locale\LocaleInterface; |
|
28
|
|
|
use Fisharebest\Webtrees\I18N; |
|
29
|
|
|
|
|
30
|
|
|
use function preg_replace; |
|
31
|
|
|
use function strlen; |
|
32
|
|
|
use function substr_compare; |
|
33
|
|
|
|
|
34
|
|
|
/** |
|
35
|
|
|
* Utilities to support localization. |
|
36
|
|
|
*/ |
|
37
|
|
|
class LocalizationService |
|
38
|
|
|
{ |
|
39
|
|
|
// Alphabets used by various scripts and locales. |
|
40
|
|
|
private const ARABIC_ALPHABET = ['ا', 'ب', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ف', 'ق', 'ك', 'ل', 'م', 'ن', 'ه', 'و', 'ي', 'آ', 'ة', 'ى', 'ی']; |
|
41
|
|
|
private const CZECH_ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'CH', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']; |
|
42
|
|
|
private const CYRILLIC_ALPHABET = ['А', 'Б', 'В', 'Г', 'Д', 'Е', 'Ё', 'Ж', 'З', 'И', 'Й', 'К', 'Л', 'М', 'Н', 'О', 'П', 'Р', 'С', 'Т', 'У', 'Ф', 'Х', 'Ц', 'Ч', 'Ш', 'Щ', 'Ъ', 'Ы', 'Ь', 'Э', 'Ю', 'Я']; |
|
43
|
|
|
private const DUTCH_ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'IJ']; |
|
44
|
|
|
private const ESTONIAN_ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'Š', 'Z', 'Ž', 'T', 'U', 'V', 'W', 'Õ', 'Ä', 'Ö', 'Ü', 'X', 'Y']; |
|
45
|
|
|
private const GREEK_ALPHABET = ['Α', 'Β', 'Γ', 'Δ', 'Ε', 'Ζ', 'Η', 'Θ', 'Ι', 'Κ', 'Λ', 'Μ', 'Ν', 'Ξ', 'Ο', 'Π', 'Ρ', 'Σ', 'Τ', 'Υ', 'Φ', 'Χ', 'Ψ', 'Ω']; |
|
46
|
|
|
private const HEBREW_ALPHABET = ['א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק', 'ר', 'ש', 'ת']; |
|
47
|
|
|
private const LATIN_ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']; |
|
48
|
|
|
private const LITHUANIAN_ALPHABET = ['A', 'Ą', 'B', 'C', 'Č', 'D', 'E', 'Ę', 'Ė', 'F', 'G', 'H', 'I', 'Y', 'Į', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'R', 'S', 'Š', 'T', 'U', 'Ų', 'Ū', 'V', 'Z', 'Ž']; |
|
49
|
|
|
private const HUNGARIAN_ALPHABET = ['A', 'B', 'C', 'CS', 'D', 'DZ', 'DZS', 'E', 'F', 'G', 'GY', 'H', 'I', 'J', 'K', 'L', 'LY', 'M', 'N', 'NY', 'O', 'Ö', 'P', 'Q', 'R', 'S', 'SZ', 'T', 'TY', 'U', 'Ü', 'V', 'W', 'X', 'Y', 'Z', 'ZS']; |
|
50
|
|
|
private const NORWEGIAN_ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'Æ', 'Ø', 'Å']; |
|
51
|
|
|
private const POLISH_ALPHABET = ['A', 'B', 'C', 'Ć', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'Ł', 'M', 'N', 'O', 'Ó', 'P', 'Q', 'R', 'S', 'Ś', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'Ź', 'Ż']; |
|
52
|
|
|
private const ROMANIAN_ALPHABET = ['A', 'Ă', 'Â', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'Î', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'Ş', 'T', 'Ţ', 'U', 'V', 'W', 'X', 'Y', 'Z']; |
|
53
|
|
|
private const SERBIAN_ALPHABET = ['A', 'B', 'C', 'Č', 'Ć', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'Š', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'Ž']; |
|
54
|
|
|
private const SLOVAKIAN_ALPHABET = ['A', 'Á', 'Ä', 'B', 'C', 'Č', 'D', 'Ď', 'E', 'É', 'F', 'G', 'H', 'I', 'Í', 'J', 'K', 'L', 'Ľ', 'Ĺ', 'M', 'N', 'Ň', 'O', 'Ó', 'Ô', 'P', 'Q', 'R', 'Ŕ', 'S', 'Š', 'T', 'Ť', 'U', 'Ú', 'V', 'W', 'X', 'Y', 'Ý', 'Z', 'Ž']; |
|
55
|
|
|
private const SLOVENIAN_ALPHABET = ['A', 'B', 'C', 'Č', 'Ć', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'Š', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'Ž']; |
|
56
|
|
|
private const SPANISH_ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'Ñ', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']; |
|
57
|
|
|
private const SWEDISH_ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'Å', 'Ä', 'Ö']; |
|
58
|
|
|
private const TURKISH_ALPHABET = ['A', 'B', 'C', 'Ç', 'D', 'E', 'F', 'G', 'Ğ', 'H', 'I', 'İ', 'J', 'K', 'L', 'M', 'N', 'O', 'Ö', 'P', 'R', 'S', 'Ş', 'T', 'U', 'Ü', 'V', 'Y', 'Z']; |
|
59
|
|
|
|
|
60
|
|
|
// Scripts with a default alphabet. |
|
61
|
|
|
private const ALPHABETS_FOR_SCRIPT = [ |
|
62
|
|
|
'Latn' => self::LATIN_ALPHABET, |
|
63
|
|
|
'Cyrl' => self::CYRILLIC_ALPHABET, |
|
64
|
|
|
'Grek' => self::GREEK_ALPHABET, |
|
65
|
|
|
'Hebr' => self::HEBREW_ALPHABET, |
|
66
|
|
|
]; |
|
67
|
|
|
|
|
68
|
|
|
// Locales that use a non-default alphabet. |
|
69
|
|
|
private const ALPHABETS = [ |
|
70
|
|
|
'ar' => self::ARABIC_ALPHABET, |
|
71
|
|
|
'cs' => self::CZECH_ALPHABET, |
|
72
|
|
|
'da' => self::NORWEGIAN_ALPHABET, |
|
73
|
|
|
'es' => self::SPANISH_ALPHABET, |
|
74
|
|
|
'et' => self::ESTONIAN_ALPHABET, |
|
75
|
|
|
'fi' => self::SWEDISH_ALPHABET, |
|
76
|
|
|
'hu' => self::HUNGARIAN_ALPHABET, |
|
77
|
|
|
'lt' => self::LITHUANIAN_ALPHABET, |
|
78
|
|
|
'nb' => self::NORWEGIAN_ALPHABET, |
|
79
|
|
|
'nl' => self::DUTCH_ALPHABET, |
|
80
|
|
|
'nn' => self::NORWEGIAN_ALPHABET, |
|
81
|
|
|
'pl' => self::POLISH_ALPHABET, |
|
82
|
|
|
'ro' => self::ROMANIAN_ALPHABET, |
|
83
|
|
|
'sk' => self::SLOVAKIAN_ALPHABET, |
|
84
|
|
|
'sl' => self::SLOVENIAN_ALPHABET, |
|
85
|
|
|
'sr-Latn' => self::SERBIAN_ALPHABET, |
|
86
|
|
|
'tr' => self::TURKISH_ALPHABET, |
|
87
|
|
|
'sv' => self::SWEDISH_ALPHABET, |
|
88
|
|
|
]; |
|
89
|
|
|
|
|
90
|
|
|
// Some language collate names using digraphs (or trigraphs). |
|
91
|
|
|
private const DIGRAPHS = [ |
|
92
|
|
|
'cs' => ['CH' => 'CH'], |
|
93
|
|
|
'da' => ['AA' => 'Å'], |
|
94
|
|
|
'nb' => ['AA' => 'Å'], |
|
95
|
|
|
'hu' => ['CS' => 'CS', 'DZS' => 'DZS', 'DZ' => 'DZ', 'GY' => 'GY', 'LY' => 'LY', 'NY' => 'NY', 'SZ' => 'SZ', 'TY' => 'TY', 'ZS' => 'ZS'], |
|
96
|
|
|
'nl' => ['IJ' => 'IJ'], |
|
97
|
|
|
'nn' => ['AA' => 'Å'], |
|
98
|
|
|
]; |
|
99
|
|
|
|
|
100
|
|
|
/** |
|
101
|
|
|
* Which alphabet is used in a locale? |
|
102
|
|
|
* |
|
103
|
|
|
* @param LocaleInterface $locale |
|
104
|
|
|
* |
|
105
|
|
|
* @return array |
|
106
|
|
|
*/ |
|
107
|
|
|
public function alphabet(LocaleInterface $locale): array |
|
108
|
|
|
{ |
|
109
|
|
|
$language = $locale->languageTag(); |
|
110
|
|
|
$script = $locale->script()->code(); |
|
111
|
|
|
|
|
112
|
|
|
return self::ALPHABETS[$language] ?? self::ALPHABETS_FOR_SCRIPT[$script] ?? self::LATIN_ALPHABET; |
|
113
|
|
|
} |
|
114
|
|
|
|
|
115
|
|
|
/** |
|
116
|
|
|
* Which calendar is used in a locale? |
|
117
|
|
|
* |
|
118
|
|
|
* @param LocaleInterface $locale |
|
119
|
|
|
* |
|
120
|
|
|
* @return CalendarInterface |
|
121
|
|
|
*/ |
|
122
|
|
|
public function calendar(LocaleInterface $locale): CalendarInterface |
|
123
|
|
|
{ |
|
124
|
|
|
$non_gregorian_calendars = [ |
|
125
|
|
|
'ar' => new ArabicCalendar(), |
|
126
|
|
|
'fa' => new PersianCalendar(), |
|
127
|
|
|
'he' => new JewishCalendar(), |
|
128
|
|
|
'yi' => new JewishCalendar(), |
|
129
|
|
|
]; |
|
130
|
|
|
|
|
131
|
|
|
return $non_gregorian_calendars[$locale->languageTag()] ?? new GregorianCalendar(); |
|
132
|
|
|
} |
|
133
|
|
|
|
|
134
|
|
|
/** |
|
135
|
|
|
* Which MySQL collation should be used for this locale? |
|
136
|
|
|
* |
|
137
|
|
|
* @param LocaleInterface $locale |
|
138
|
|
|
* |
|
139
|
|
|
* @return string |
|
140
|
|
|
*/ |
|
141
|
|
|
public function collation(LocaleInterface $locale): string |
|
142
|
|
|
{ |
|
143
|
|
|
$collation = $locale->collation(); |
|
144
|
|
|
|
|
145
|
|
|
switch ($collation) { |
|
146
|
|
|
case 'croatian_ci': |
|
147
|
|
|
case 'german2_ci': |
|
148
|
|
|
case 'vietnamese_ci': |
|
149
|
|
|
// Only available in MySQL 5.6 |
|
150
|
|
|
return 'utf8_unicode_ci'; |
|
151
|
|
|
default: |
|
152
|
|
|
return 'utf8_' . $collation; |
|
153
|
|
|
} |
|
154
|
|
|
} |
|
155
|
|
|
|
|
156
|
|
|
/** |
|
157
|
|
|
* Extract the initial letter (or digraph or trigraph) from a name. |
|
158
|
|
|
* |
|
159
|
|
|
* @param string $text |
|
160
|
|
|
* @param LocaleInterface $locale |
|
161
|
|
|
* |
|
162
|
|
|
* @return string |
|
163
|
|
|
*/ |
|
164
|
|
|
public function initialLetter(string $text, LocaleInterface $locale): string |
|
165
|
|
|
{ |
|
166
|
|
|
$text = I18N::strtoupper($text); |
|
167
|
|
|
|
|
168
|
|
|
$digraphs = self::DIGRAPHS[$locale->languageTag()] ?? []; |
|
169
|
|
|
|
|
170
|
|
|
foreach ($digraphs as $key => $value) { |
|
171
|
|
|
if (substr_compare($text, $key, 0, strlen($key)) === 0) { |
|
172
|
|
|
return $value; |
|
173
|
|
|
} |
|
174
|
|
|
} |
|
175
|
|
|
|
|
176
|
|
|
// No special rules - just take the first character |
|
177
|
|
|
return mb_substr($text, 0, 1); |
|
178
|
|
|
} |
|
179
|
|
|
|
|
180
|
|
|
/** |
|
181
|
|
|
* Convert a PHP date format string into DMY, MDY or YMD |
|
182
|
|
|
* |
|
183
|
|
|
* @param string $format |
|
184
|
|
|
* |
|
185
|
|
|
* @return string |
|
186
|
|
|
*/ |
|
187
|
|
|
public function dateFormatToOrder(string $format): string |
|
188
|
|
|
{ |
|
189
|
|
|
if (preg_match('/[yY].*[mnFM].*[dj]/', $format)) { |
|
190
|
|
|
return 'YMD'; |
|
191
|
|
|
} |
|
192
|
|
|
|
|
193
|
|
|
if (preg_match('/[mnFM].*[dj].*[yY]/', $format)) { |
|
194
|
|
|
return 'MDY'; |
|
195
|
|
|
} |
|
196
|
|
|
|
|
197
|
|
|
return 'DMY'; |
|
198
|
|
|
} |
|
199
|
|
|
} |
|
200
|
|
|
|