1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* webtrees: online genealogy |
5
|
|
|
* Copyright (C) 2019 webtrees development team |
6
|
|
|
* This program is free software: you can redistribute it and/or modify |
7
|
|
|
* it under the terms of the GNU General Public License as published by |
8
|
|
|
* the Free Software Foundation, either version 3 of the License, or |
9
|
|
|
* (at your option) any later version. |
10
|
|
|
* This program is distributed in the hope that it will be useful, |
11
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
12
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13
|
|
|
* GNU General Public License for more details. |
14
|
|
|
* You should have received a copy of the GNU General Public License |
15
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
16
|
|
|
*/ |
17
|
|
|
|
18
|
|
|
declare(strict_types=1); |
19
|
|
|
|
20
|
|
|
namespace Fisharebest\Webtrees\Services; |
21
|
|
|
|
22
|
|
|
use Fisharebest\ExtCalendar\ArabicCalendar; |
23
|
|
|
use Fisharebest\ExtCalendar\CalendarInterface; |
24
|
|
|
use Fisharebest\ExtCalendar\GregorianCalendar; |
25
|
|
|
use Fisharebest\ExtCalendar\JewishCalendar; |
26
|
|
|
use Fisharebest\ExtCalendar\PersianCalendar; |
27
|
|
|
use Fisharebest\Localization\Locale\LocaleInterface; |
28
|
|
|
use Fisharebest\Webtrees\I18N; |
29
|
|
|
|
30
|
|
|
use function preg_replace; |
31
|
|
|
use function strlen; |
32
|
|
|
use function substr_compare; |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* Utilities to support localization. |
36
|
|
|
*/ |
37
|
|
|
class LocalizationService |
38
|
|
|
{ |
39
|
|
|
// Alphabets used by various scripts and locales. |
40
|
|
|
private const ARABIC_ALPHABET = ['ا', 'ب', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ف', 'ق', 'ك', 'ل', 'م', 'ن', 'ه', 'و', 'ي', 'آ', 'ة', 'ى', 'ی']; |
41
|
|
|
private const CZECH_ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'CH', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']; |
42
|
|
|
private const CYRILLIC_ALPHABET = ['А', 'Б', 'В', 'Г', 'Д', 'Е', 'Ё', 'Ж', 'З', 'И', 'Й', 'К', 'Л', 'М', 'Н', 'О', 'П', 'Р', 'С', 'Т', 'У', 'Ф', 'Х', 'Ц', 'Ч', 'Ш', 'Щ', 'Ъ', 'Ы', 'Ь', 'Э', 'Ю', 'Я']; |
43
|
|
|
private const DUTCH_ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'IJ']; |
44
|
|
|
private const ESTONIAN_ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'Š', 'Z', 'Ž', 'T', 'U', 'V', 'W', 'Õ', 'Ä', 'Ö', 'Ü', 'X', 'Y']; |
45
|
|
|
private const GREEK_ALPHABET = ['Α', 'Β', 'Γ', 'Δ', 'Ε', 'Ζ', 'Η', 'Θ', 'Ι', 'Κ', 'Λ', 'Μ', 'Ν', 'Ξ', 'Ο', 'Π', 'Ρ', 'Σ', 'Τ', 'Υ', 'Φ', 'Χ', 'Ψ', 'Ω']; |
46
|
|
|
private const HEBREW_ALPHABET = ['א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק', 'ר', 'ש', 'ת']; |
47
|
|
|
private const LATIN_ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']; |
48
|
|
|
private const LITHUANIAN_ALPHABET = ['A', 'Ą', 'B', 'C', 'Č', 'D', 'E', 'Ę', 'Ė', 'F', 'G', 'H', 'I', 'Y', 'Į', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'R', 'S', 'Š', 'T', 'U', 'Ų', 'Ū', 'V', 'Z', 'Ž']; |
49
|
|
|
private const HUNGARIAN_ALPHABET = ['A', 'B', 'C', 'CS', 'D', 'DZ', 'DZS', 'E', 'F', 'G', 'GY', 'H', 'I', 'J', 'K', 'L', 'LY', 'M', 'N', 'NY', 'O', 'Ö', 'P', 'Q', 'R', 'S', 'SZ', 'T', 'TY', 'U', 'Ü', 'V', 'W', 'X', 'Y', 'Z', 'ZS']; |
50
|
|
|
private const NORWEGIAN_ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'Æ', 'Ø', 'Å']; |
51
|
|
|
private const POLISH_ALPHABET = ['A', 'B', 'C', 'Ć', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'Ł', 'M', 'N', 'O', 'Ó', 'P', 'Q', 'R', 'S', 'Ś', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'Ź', 'Ż']; |
52
|
|
|
private const ROMANIAN_ALPHABET = ['A', 'Ă', 'Â', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'Î', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'Ş', 'T', 'Ţ', 'U', 'V', 'W', 'X', 'Y', 'Z']; |
53
|
|
|
private const SERBIAN_ALPHABET = ['A', 'B', 'C', 'Č', 'Ć', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'Š', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'Ž']; |
54
|
|
|
private const SLOVAKIAN_ALPHABET = ['A', 'Á', 'Ä', 'B', 'C', 'Č', 'D', 'Ď', 'E', 'É', 'F', 'G', 'H', 'I', 'Í', 'J', 'K', 'L', 'Ľ', 'Ĺ', 'M', 'N', 'Ň', 'O', 'Ó', 'Ô', 'P', 'Q', 'R', 'Ŕ', 'S', 'Š', 'T', 'Ť', 'U', 'Ú', 'V', 'W', 'X', 'Y', 'Ý', 'Z', 'Ž']; |
55
|
|
|
private const SLOVENIAN_ALPHABET = ['A', 'B', 'C', 'Č', 'Ć', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'Š', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'Ž']; |
56
|
|
|
private const SPANISH_ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'Ñ', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']; |
57
|
|
|
private const SWEDISH_ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'Å', 'Ä', 'Ö']; |
58
|
|
|
private const TURKISH_ALPHABET = ['A', 'B', 'C', 'Ç', 'D', 'E', 'F', 'G', 'Ğ', 'H', 'I', 'İ', 'J', 'K', 'L', 'M', 'N', 'O', 'Ö', 'P', 'R', 'S', 'Ş', 'T', 'U', 'Ü', 'V', 'Y', 'Z']; |
59
|
|
|
|
60
|
|
|
// Scripts with a default alphabet. |
61
|
|
|
private const ALPHABETS_FOR_SCRIPT = [ |
62
|
|
|
'Latn' => self::LATIN_ALPHABET, |
63
|
|
|
'Cyrl' => self::CYRILLIC_ALPHABET, |
64
|
|
|
'Grek' => self::GREEK_ALPHABET, |
65
|
|
|
'Hebr' => self::HEBREW_ALPHABET, |
66
|
|
|
]; |
67
|
|
|
|
68
|
|
|
// Locales that use a non-default alphabet. |
69
|
|
|
private const ALPHABETS = [ |
70
|
|
|
'ar' => self::ARABIC_ALPHABET, |
71
|
|
|
'cs' => self::CZECH_ALPHABET, |
72
|
|
|
'da' => self::NORWEGIAN_ALPHABET, |
73
|
|
|
'es' => self::SPANISH_ALPHABET, |
74
|
|
|
'et' => self::ESTONIAN_ALPHABET, |
75
|
|
|
'fi' => self::SWEDISH_ALPHABET, |
76
|
|
|
'hu' => self::HUNGARIAN_ALPHABET, |
77
|
|
|
'lt' => self::LITHUANIAN_ALPHABET, |
78
|
|
|
'nb' => self::NORWEGIAN_ALPHABET, |
79
|
|
|
'nl' => self::DUTCH_ALPHABET, |
80
|
|
|
'nn' => self::NORWEGIAN_ALPHABET, |
81
|
|
|
'pl' => self::POLISH_ALPHABET, |
82
|
|
|
'ro' => self::ROMANIAN_ALPHABET, |
83
|
|
|
'sk' => self::SLOVAKIAN_ALPHABET, |
84
|
|
|
'sl' => self::SLOVENIAN_ALPHABET, |
85
|
|
|
'sr-Latn' => self::SERBIAN_ALPHABET, |
86
|
|
|
'tr' => self::TURKISH_ALPHABET, |
87
|
|
|
'sv' => self::SWEDISH_ALPHABET, |
88
|
|
|
]; |
89
|
|
|
|
90
|
|
|
// Some language collate names using digraphs (or trigraphs). |
91
|
|
|
private const DIGRAPHS = [ |
92
|
|
|
'cs' => ['CH' => 'CH'], |
93
|
|
|
'da' => ['AA' => 'Å'], |
94
|
|
|
'nb' => ['AA' => 'Å'], |
95
|
|
|
'hu' => ['CS' => 'CS', 'DZS' => 'DZS', 'DZ' => 'DZ', 'GY' => 'GY', 'LY' => 'LY', 'NY' => 'NY', 'SZ' => 'SZ', 'TY' => 'TY', 'ZS' => 'ZS'], |
96
|
|
|
'nl' => ['IJ' => 'IJ'], |
97
|
|
|
'nn' => ['AA' => 'Å'], |
98
|
|
|
]; |
99
|
|
|
|
100
|
|
|
/** |
101
|
|
|
* Which alphabet is used in a locale? |
102
|
|
|
* |
103
|
|
|
* @param LocaleInterface $locale |
104
|
|
|
* |
105
|
|
|
* @return array |
106
|
|
|
*/ |
107
|
|
|
public function alphabet(LocaleInterface $locale): array |
108
|
|
|
{ |
109
|
|
|
$language = $locale->languageTag(); |
110
|
|
|
$script = $locale->script()->code(); |
111
|
|
|
|
112
|
|
|
return self::ALPHABETS[$language] ?? self::ALPHABETS_FOR_SCRIPT[$script] ?? self::LATIN_ALPHABET; |
113
|
|
|
} |
114
|
|
|
|
115
|
|
|
/** |
116
|
|
|
* Which calendar is used in a locale? |
117
|
|
|
* |
118
|
|
|
* @param LocaleInterface $locale |
119
|
|
|
* |
120
|
|
|
* @return CalendarInterface |
121
|
|
|
*/ |
122
|
|
|
public function calendar(LocaleInterface $locale): CalendarInterface |
123
|
|
|
{ |
124
|
|
|
$non_gregorian_calendars = [ |
125
|
|
|
'ar' => new ArabicCalendar(), |
126
|
|
|
'fa' => new PersianCalendar(), |
127
|
|
|
'he' => new JewishCalendar(), |
128
|
|
|
'yi' => new JewishCalendar(), |
129
|
|
|
]; |
130
|
|
|
|
131
|
|
|
return $non_gregorian_calendars[$locale->languageTag()] ?? new GregorianCalendar(); |
132
|
|
|
} |
133
|
|
|
|
134
|
|
|
/** |
135
|
|
|
* Which MySQL collation should be used for this locale? |
136
|
|
|
* |
137
|
|
|
* @param LocaleInterface $locale |
138
|
|
|
* |
139
|
|
|
* @return string |
140
|
|
|
*/ |
141
|
|
|
public function collation(LocaleInterface $locale): string |
142
|
|
|
{ |
143
|
|
|
$collation = $locale->collation(); |
144
|
|
|
|
145
|
|
|
switch ($collation) { |
146
|
|
|
case 'croatian_ci': |
147
|
|
|
case 'german2_ci': |
148
|
|
|
case 'vietnamese_ci': |
149
|
|
|
// Only available in MySQL 5.6 |
150
|
|
|
return 'utf8_unicode_ci'; |
151
|
|
|
default: |
152
|
|
|
return 'utf8_' . $collation; |
153
|
|
|
} |
154
|
|
|
} |
155
|
|
|
|
156
|
|
|
/** |
157
|
|
|
* Extract the initial letter (or digraph or trigraph) from a name. |
158
|
|
|
* |
159
|
|
|
* @param string $text |
160
|
|
|
* @param LocaleInterface $locale |
161
|
|
|
* |
162
|
|
|
* @return string |
163
|
|
|
*/ |
164
|
|
|
public function initialLetter(string $text, LocaleInterface $locale): string |
165
|
|
|
{ |
166
|
|
|
$text = I18N::strtoupper($text); |
167
|
|
|
|
168
|
|
|
$digraphs = self::DIGRAPHS[$locale->languageTag()] ?? []; |
169
|
|
|
|
170
|
|
|
foreach ($digraphs as $key => $value) { |
171
|
|
|
if (substr_compare($text, $key, 0, strlen($key)) === 0) { |
172
|
|
|
return $value; |
173
|
|
|
} |
174
|
|
|
} |
175
|
|
|
|
176
|
|
|
// No special rules - just take the first character |
177
|
|
|
return mb_substr($text, 0, 1); |
178
|
|
|
} |
179
|
|
|
|
180
|
|
|
/** |
181
|
|
|
* Convert a PHP date format string into DMY, MDY or YMD |
182
|
|
|
* |
183
|
|
|
* @param string $format |
184
|
|
|
* |
185
|
|
|
* @return string |
186
|
|
|
*/ |
187
|
|
|
public function dateFormatToOrder(string $format): string |
188
|
|
|
{ |
189
|
|
|
if (preg_match('/[yY].*[mnFM].*[dj]/', $format)) { |
190
|
|
|
return 'YMD'; |
191
|
|
|
} |
192
|
|
|
|
193
|
|
|
if (preg_match('/[mnFM].*[dj].*[yY]/', $format)) { |
194
|
|
|
return 'MDY'; |
195
|
|
|
} |
196
|
|
|
|
197
|
|
|
return 'DMY'; |
198
|
|
|
} |
199
|
|
|
} |
200
|
|
|
|