1 | <?php namespace Tamtamchik\NameCase; |
||||
2 | |||||
3 | /** |
||||
4 | * Class Formatter. |
||||
5 | */ |
||||
6 | class Formatter |
||||
7 | { |
||||
8 | // Irish exceptions. |
||||
9 | private const EXCEPTIONS = [ |
||||
10 | '\bMacEdo' => 'Macedo', |
||||
11 | '\bMacEvicius' => 'Macevicius', |
||||
12 | '\bMacHado' => 'Machado', |
||||
13 | '\bMacHar' => 'Machar', |
||||
14 | '\bMacHin' => 'Machin', |
||||
15 | '\bMacHlin' => 'Machlin', |
||||
16 | '\bMacIas' => 'Macias', |
||||
17 | '\bMacIulis' => 'Maciulis', |
||||
18 | '\bMacKie' => 'Mackie', |
||||
19 | '\bMacKle' => 'Mackle', |
||||
20 | '\bMacKlin' => 'Macklin', |
||||
21 | '\bMacKmin' => 'Mackmin', |
||||
22 | '\bMacQuarie' => 'Macquarie', |
||||
23 | '\bMacOmber' => 'Macomber', |
||||
24 | '\bMacIn' => 'Macin', |
||||
25 | '\bMacKintosh' => 'Mackintosh', |
||||
26 | '\bMacKen' => 'Macken', |
||||
27 | '\bMacHen' => 'Machen', |
||||
28 | '\bMacisaac' => 'MacIsaac', |
||||
29 | '\bMacHiel' => 'Machiel', |
||||
30 | '\bMacIol' => 'Maciol', |
||||
31 | '\bMacKell' => 'Mackell', |
||||
32 | '\bMacKlem' => 'Macklem', |
||||
33 | '\bMacKrell' => 'Mackrell', |
||||
34 | '\bMacLin' => 'Maclin', |
||||
35 | '\bMacKey' => 'Mackey', |
||||
36 | '\bMacKley' => 'Mackley', |
||||
37 | '\bMacHell' => 'Machell', |
||||
38 | '\bMacHon' => 'Machon', |
||||
39 | ]; |
||||
40 | |||||
41 | // General replacements. |
||||
42 | private const REPLACEMENTS = [ |
||||
43 | '\bAl(?=\s+\w)' => 'al', // al Arabic or forename Al. |
||||
44 | '\bAp\b' => 'ap', // ap Welsh. |
||||
45 | '\b(Bin|Binti|Binte)\b' => 'bin', // bin, binti, binte Arabic. |
||||
46 | '\bDell([ae])\b' => 'dell\1', // della and delle Italian. |
||||
47 | '\bD([aeiou])\b' => 'd\1', // da, de, di Italian; du French; do Brasil. |
||||
48 | '\bD([ao]s)\b' => 'd\1', // das, dos Brasileiros. |
||||
49 | '\bDe([lrn])\b' => 'de\1', // del Italian; der/den Dutch/Flemish. |
||||
50 | '\bL([eo])\b' => 'l\1', // lo Italian; le French. |
||||
51 | '\bTe([rn])\b' => 'te\1', // ten, ter Dutch/Flemish. |
||||
52 | '\bVan(?=\s+\w)' => 'van', // van German or forename Van. |
||||
53 | '\bVon\b' => 'von', // von Dutch/Flemish. |
||||
54 | ]; |
||||
55 | |||||
56 | private const SPANISH = [ |
||||
57 | '\bEl\b' => 'el', // el Greek or El Spanish. |
||||
58 | '\bLa\b' => 'la', // la French or La Spanish. |
||||
59 | ]; |
||||
60 | |||||
61 | private const HEBREW = [ |
||||
62 | '\bBen(?=\s+\w)' => 'ben', // ben Hebrew or forename Ben. |
||||
63 | '\bBat(?=\s+\w)' => 'bat', // bat Hebrew or forename Bat. |
||||
64 | ]; |
||||
65 | |||||
66 | // Spanish conjunctions. |
||||
67 | private const CONJUNCTIONS = ['Y', 'E', 'I']; |
||||
68 | |||||
69 | // Roman letters regexp. |
||||
70 | private const ROMAN_REGEX = '\b((?:[Xx]{1,3}|[Xx][Ll]|[Ll][Xx]{0,3})?(?:[Ii]{1,3}|[Ii][VvXx]|[Vv][Ii]{0,3})?)\b'; |
||||
71 | |||||
72 | // Post nominal values. |
||||
73 | private const POST_NOMINALS = [ |
||||
74 | 'ACILEx', 'ACSM', 'ADC', 'AEPC', 'AFC', 'AFM', 'AICSM', 'AKC', 'AM', 'ARBRIBA', 'ARCS', 'ARRC', 'ARSM', 'AUH', |
||||
75 | 'AUS', |
||||
76 | 'BA', 'BArch', 'BCh', 'BChir', 'BCL', 'BDS', 'BEd', 'BEM', 'BEng', 'BM', 'BS', 'BSc', 'BSW', 'BVM&S', |
||||
77 | 'BVScBVetMed', |
||||
78 | 'CB', 'CBE', 'CEng', 'CertHE', 'CGC', 'CGM', 'CH', 'CIE', 'CMarEngCMarSci', 'CMarTech', 'CMG', 'CMILT', |
||||
79 | 'CML', 'CPhT', 'CPLCTP', 'CPM', 'CQSW', 'CSciTeach', 'CSI', 'CTL', 'CVO', |
||||
80 | 'DBE', 'DBEnv', 'DC', 'DCB', 'DCM', 'DCMG', 'DConstMgt', 'DCVO', 'DD', 'DEM', 'DFC', 'DFM', 'DIC', 'Dip', |
||||
81 | 'DipHE', 'DipLP', 'DipSW', 'DL', 'DLitt', 'DLP', 'DPhil', 'DProf', 'DPT', 'DREst', 'DSC', 'DSM', 'DSO', |
||||
82 | 'DSocSci', |
||||
83 | 'ED', 'EdD', 'EJLog', 'EMLog', 'EN', 'EngD', 'EngTech', 'ERD', 'ESLog', |
||||
84 | 'FADO', 'FAWM', 'FBDOFCOptom', 'FCEM', 'FCILEx', 'FCILT', 'FCSP.', 'FdAFdSc', 'FdEng', 'FFHOM', 'FFPM', |
||||
85 | 'FRCAFFPMRCA', 'FRCGP', 'FRCOG', 'FRCP', 'FRCPsych', 'FRCS', 'FRCVS', 'FSCR.', |
||||
86 | 'GBE', 'GC', 'GCB', 'GCIE', 'GCILEx', 'GCMG', 'GCSI', 'GCVO', 'GM', |
||||
87 | 'HNC', 'HNCert', 'HND', 'HNDip', |
||||
88 | 'ICTTech', 'IDSM', 'IEng', 'IMarEng', 'IOMCPM', 'ISO', |
||||
89 | 'J', 'JP', 'JrLog', |
||||
90 | 'KBE', 'KC', 'KCB', 'KCIE', 'KCMG', 'KCSI', 'KCVO', 'KG', 'KP', 'KT', |
||||
91 | 'LFHOM', 'LG', 'LJ', 'LLB', 'LLD', 'LLM', 'Log', 'LPE', /* 'LT', - excluded, see initial names */ |
||||
92 | 'LVO', |
||||
93 | 'MA', 'MAcc', 'MAnth', 'MArch', 'MarEngTech', 'MB', 'MBA', 'MBChB', 'MBE', 'MBEIOM', 'MBiochem', 'MC', 'MCEM', |
||||
94 | 'MCGI', 'MCh.', 'MChem', 'MChiro', 'MClinRes', 'MComp', 'MCOptom', 'MCSM', 'MCSP', 'MD', 'MEarthSc', |
||||
95 | 'MEng', 'MEnt', 'MEP', 'MFHOM', 'MFin', 'MFPM', 'MGeol', 'MILT', 'MJur', 'MLA', 'MLitt', 'MM', 'MMath', |
||||
96 | 'MMathStat', 'MMORSE', 'MMus', 'MOst', 'MP', 'MPAMEd', 'MPharm', 'MPhil', 'MPhys', 'MRCGP', 'MRCOG', |
||||
97 | 'MRCP', 'MRCPath', 'MRCPCHFRCPCH', 'MRCPsych', 'MRCS', 'MRCVS', 'MRes', |
||||
98 | /* 'MS', - excluded, see initial names */ |
||||
99 | 'MSc', 'MScChiro', 'MSci', |
||||
100 | 'MSCR', 'MSM', 'MSocSc', 'MSP', 'MSt', 'MSW', 'MSYP', 'MVO', |
||||
101 | 'NPQH', |
||||
102 | 'OBE', 'OBI', 'OM', 'OND', |
||||
103 | 'PgC', 'PGCAP', 'PGCE', 'PgCert', 'PGCHE', 'PgCLTHE', 'PgD', 'PGDE', 'PgDip', 'PhD', 'PLog', 'PLS', |
||||
104 | 'QAM', 'QC', 'QFSM', 'QGM', 'QHC', 'QHDS', 'QHNS', 'QHP', 'QHS', 'QPM', 'QS', 'QTSCSci', |
||||
105 | 'RD', 'RFHN', 'RGN', 'RHV', 'RIAI', 'RIAS', 'RM', 'RMN', 'RN', 'RN1RNA', 'RN2', 'RN3', 'RN4', 'RN5', 'RN6', 'RN7', 'RN8', 'RN9', 'RNC', 'RNLD', 'RNMH', 'ROH', 'RRC', 'RSAW', 'RSci', 'RSciTech', 'RSCN', 'RSN', 'RVM', 'RVN', |
||||
106 | 'SCHM', 'SCJ', 'SCLD', 'SEN', 'SGM', 'SL', 'SPANSPMH', 'SPCC', 'SPCN', 'SPDN', 'SPHP', 'SPLD', 'SrLog', 'SRN', 'SROT', |
||||
107 | 'TD', |
||||
108 | 'UD', |
||||
109 | 'V100', 'V200', 'V300', 'VC', 'VD', 'VetMB', 'VN', 'VRD' |
||||
110 | ]; |
||||
111 | |||||
112 | // Excluded post-nominals |
||||
113 | private const INITIAL_NAME_REGEX = '\b(Aj|[bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ]{2})\s'; |
||||
114 | |||||
115 | // Most two-letter words with no vowels should be kept in all caps as initials |
||||
116 | private const INITIAL_NAME_EXCEPTIONS = [ |
||||
117 | 'Mr', |
||||
118 | 'Ms', // Replaces Member of the Senedd post nominal. |
||||
119 | 'Dr', |
||||
120 | 'St', |
||||
121 | 'Jr', |
||||
122 | 'Sr', |
||||
123 | 'Lt', // Replaces Lady of the Order of the Thistle post nominal. |
||||
124 | ]; |
||||
125 | private const LOWER_CASE_WORDS = ['The', 'Of', 'And']; |
||||
126 | |||||
127 | // Lowercase words |
||||
128 | private static $postNominalsExcluded = []; |
||||
129 | |||||
130 | // Default options. |
||||
131 | private static $options = [ |
||||
132 | 'lazy' => true, |
||||
133 | 'irish' => true, |
||||
134 | 'spanish' => false, |
||||
135 | 'roman' => true, |
||||
136 | 'hebrew' => true, |
||||
137 | 'postnominal' => true, |
||||
138 | ]; |
||||
139 | |||||
140 | /** |
||||
141 | * Formatter constructor. |
||||
142 | * |
||||
143 | * @param array $options |
||||
144 | */ |
||||
145 | 2 | public function __construct(array $options = []) |
|||
146 | { |
||||
147 | 2 | $this->setOptions($options); |
|||
148 | } |
||||
149 | |||||
150 | /** |
||||
151 | * Global options setter. |
||||
152 | * |
||||
153 | * @param array $options |
||||
154 | */ |
||||
155 | 24 | public static function setOptions(array $options): void |
|||
156 | { |
||||
157 | 24 | self::$options = array_merge(self::$options, $options); |
|||
158 | } |
||||
159 | |||||
160 | /** |
||||
161 | * Global post-nominals exclusions setter. |
||||
162 | * |
||||
163 | * @param array|string|null $values |
||||
164 | * @return boolean|void |
||||
165 | */ |
||||
166 | 3 | public static function excludePostNominals($values) |
|||
167 | { |
||||
168 | 3 | if (is_string($values)) { |
|||
169 | 1 | $values = [$values]; |
|||
170 | } |
||||
171 | |||||
172 | 3 | if ( ! is_array($values)) { |
|||
173 | 1 | return false; |
|||
174 | } |
||||
175 | |||||
176 | 2 | self::$postNominalsExcluded = array_merge(self::$postNominalsExcluded, $values); |
|||
177 | } |
||||
178 | |||||
179 | /** |
||||
180 | * Main function for NameCase. |
||||
181 | * |
||||
182 | * @param string|null $name |
||||
183 | * @param array|null $options |
||||
184 | * |
||||
185 | * @return string |
||||
186 | */ |
||||
187 | 24 | public static function nameCase(?string $name = '', ?array $options = []): string |
|||
188 | { |
||||
189 | 24 | $name = is_null($name) ? '' : $name; |
|||
190 | |||||
191 | 24 | self::setOptions($options); |
|||
192 | |||||
193 | // Do not do anything if string is mixed and lazy option is true. |
||||
194 | 24 | if ( ! self::canBeProcessed($name)) { |
|||
195 | 5 | return $name; |
|||
196 | } |
||||
197 | |||||
198 | 21 | $original = $name; |
|||
199 | |||||
200 | // Capitalize |
||||
201 | 21 | $name = self::capitalize($name); |
|||
202 | 21 | foreach (self::getReplacements() as $pattern => $replacement) { |
|||
203 | 21 | $name = mb_ereg_replace($pattern, $replacement, $name); |
|||
204 | |||||
205 | // Very difficult to write a test in modern environments |
||||
206 | // @codeCoverageIgnoreStart |
||||
207 | if ( ! is_string($name)) { |
||||
208 | return $original; |
||||
209 | } |
||||
210 | // @codeCoverageIgnoreEnd |
||||
211 | } |
||||
212 | |||||
213 | 21 | $name = self::correctInitialNames($name); |
|||
214 | 21 | $name = self::correctLowerCaseWords($name); |
|||
215 | |||||
216 | 21 | return self::processOptions($name); |
|||
217 | } |
||||
218 | |||||
219 | /** |
||||
220 | * Check if string can be processed. |
||||
221 | * |
||||
222 | * @param string $name |
||||
223 | * |
||||
224 | * @return bool |
||||
225 | */ |
||||
226 | 24 | private static function canBeProcessed(string $name): bool |
|||
227 | { |
||||
228 | 24 | if ($name != '') { |
|||
229 | 22 | return ! (self::$options['lazy'] && self::skipMixed($name)); |
|||
230 | } |
||||
231 | |||||
232 | 2 | return false; |
|||
233 | } |
||||
234 | |||||
235 | /** |
||||
236 | * Skip if string is mixed case. |
||||
237 | * |
||||
238 | * @param string $name |
||||
239 | * |
||||
240 | * @return bool |
||||
241 | */ |
||||
242 | 22 | private static function skipMixed(string $name): bool |
|||
243 | { |
||||
244 | 22 | $firstLetterLower = $name[0] == mb_strtolower($name[0]); |
|||
245 | 22 | $allLowerOrUpper = (mb_strtolower($name) == $name || mb_strtoupper($name) == $name); |
|||
246 | |||||
247 | 22 | return ! ($firstLetterLower || $allLowerOrUpper); |
|||
248 | } |
||||
249 | |||||
250 | /** |
||||
251 | * Capitalize first letters. |
||||
252 | * |
||||
253 | * @param string $name |
||||
254 | * |
||||
255 | * @return string |
||||
256 | */ |
||||
257 | 21 | private static function capitalize(string $name): string |
|||
258 | { |
||||
259 | 21 | $name = mb_strtolower($name); |
|||
260 | |||||
261 | 21 | $name = mb_ereg_replace_callback('\b\w', function ($matches) { |
|||
262 | 21 | return mb_strtoupper($matches[0]); |
|||
263 | 21 | }, $name); |
|||
264 | |||||
265 | // Lowercase 's |
||||
266 | 21 | $name = mb_ereg_replace_callback('\'\w\b', function ($matches) { |
|||
267 | 3 | return mb_strtolower($matches[0]); |
|||
268 | 21 | }, $name); |
|||
269 | |||||
270 | 21 | return self::updateIrish($name); |
|||
271 | } |
||||
272 | |||||
273 | /** |
||||
274 | * Update for Irish names. |
||||
275 | * |
||||
276 | * @param string $name |
||||
277 | * |
||||
278 | * @return string |
||||
279 | */ |
||||
280 | 21 | private static function updateIrish(string $name): string |
|||
281 | { |
||||
282 | 21 | if ( ! self::$options['irish']) return $name; |
|||
283 | |||||
284 | if ( |
||||
285 | 21 | mb_ereg_match('.*?\bMac[A-Za-z]{2,}[^aciozj]\b', $name) || |
|||
286 | 21 | mb_ereg_match('.*?\bMc', $name) |
|||
287 | ) { |
||||
288 | 7 | $name = self::updateMac($name); |
|||
289 | } |
||||
290 | |||||
291 | 21 | return mb_ereg_replace('Macmurdo', 'MacMurdo', $name); |
|||
292 | } |
||||
293 | |||||
294 | /** |
||||
295 | * Updates irish Mac & Mc. |
||||
296 | * |
||||
297 | * @param string $name |
||||
298 | * |
||||
299 | * @return string |
||||
300 | */ |
||||
301 | 7 | private static function updateMac(string $name): string |
|||
302 | { |
||||
303 | 7 | $name = mb_ereg_replace_callback('\b(Ma?c)([A-Za-z]+)', function ($matches) { |
|||
304 | 7 | return $matches[1] . mb_strtoupper(mb_substr($matches[2], 0, 1)) . mb_substr($matches[2], 1); |
|||
305 | 7 | }, $name); |
|||
306 | |||||
307 | // Now fix "Mac" exceptions |
||||
308 | 7 | foreach (self::EXCEPTIONS as $pattern => $replacement) { |
|||
309 | 7 | $name = mb_ereg_replace($pattern, $replacement, $name); |
|||
310 | } |
||||
311 | |||||
312 | 7 | return $name; |
|||
313 | } |
||||
314 | |||||
315 | /** |
||||
316 | * Define required replacements. |
||||
317 | * |
||||
318 | * @return array |
||||
319 | */ |
||||
320 | 21 | private static function getReplacements(): array |
|||
321 | { |
||||
322 | // General fixes |
||||
323 | 21 | $replacements = self::REPLACEMENTS; |
|||
324 | 21 | if ( ! self::$options['spanish']) { |
|||
325 | 21 | $replacements = array_merge($replacements, self::SPANISH); |
|||
326 | } |
||||
327 | |||||
328 | 21 | if (self::$options['hebrew']) { |
|||
329 | 21 | $replacements = array_merge($replacements, self::HEBREW); |
|||
330 | } |
||||
331 | |||||
332 | 21 | return $replacements; |
|||
333 | } |
||||
334 | |||||
335 | /** |
||||
336 | * Correct capitalization of initial names like JJ and TJ. |
||||
337 | * |
||||
338 | * @param string $name |
||||
339 | * |
||||
340 | * @return string |
||||
341 | */ |
||||
342 | 21 | private static function correctInitialNames(string $name): string |
|||
343 | { |
||||
344 | 21 | return mb_ereg_replace_callback(self::INITIAL_NAME_REGEX, function ($matches) { |
|||
345 | 1 | $match = $matches[0]; |
|||
346 | |||||
347 | 1 | if (in_array($matches[1], self::INITIAL_NAME_EXCEPTIONS)) { |
|||
348 | 1 | return $match; |
|||
349 | } |
||||
350 | |||||
351 | 1 | return mb_strtoupper($match); |
|||
352 | 21 | }, $name); |
|||
353 | } |
||||
354 | |||||
355 | /** |
||||
356 | * Correct lower-case words of titles. |
||||
357 | * |
||||
358 | * @param string $name |
||||
359 | * |
||||
360 | * @return string |
||||
361 | */ |
||||
362 | 21 | private static function correctLowerCaseWords(string $name): string |
|||
363 | { |
||||
364 | 21 | foreach (self::LOWER_CASE_WORDS as $lowercase) { |
|||
365 | 21 | $name = mb_ereg_replace('\b' . $lowercase . '\b', mb_strtolower($lowercase), $name); |
|||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||||
366 | } |
||||
367 | 21 | return $name; |
|||
368 | } |
||||
369 | |||||
370 | /** |
||||
371 | * Process options with given name |
||||
372 | * |
||||
373 | * @param string $name |
||||
374 | * |
||||
375 | * @return string |
||||
376 | */ |
||||
377 | 21 | private static function processOptions(string $name): string |
|||
378 | { |
||||
379 | 21 | if (self::$options['roman']) { |
|||
380 | 21 | $name = self::updateRoman($name); |
|||
381 | } |
||||
382 | |||||
383 | 21 | if (self::$options['spanish']) { |
|||
384 | 1 | $name = self::fixConjunction($name); |
|||
385 | } |
||||
386 | |||||
387 | 21 | if (self::$options['postnominal']) { |
|||
388 | 21 | $name = self::fixPostNominal($name); |
|||
389 | } |
||||
390 | |||||
391 | 21 | return $name; |
|||
392 | } |
||||
393 | |||||
394 | /** |
||||
395 | * Fix roman numeral names. |
||||
396 | * |
||||
397 | * @param string $name |
||||
398 | * |
||||
399 | * @return string |
||||
400 | */ |
||||
401 | 21 | private static function updateRoman(string $name): string |
|||
402 | { |
||||
403 | 21 | return mb_ereg_replace_callback(self::ROMAN_REGEX, function ($matches) { |
|||
404 | 21 | return mb_strtoupper($matches[0]); |
|||
405 | 21 | }, $name); |
|||
406 | } |
||||
407 | |||||
408 | /** |
||||
409 | * Fix Spanish conjunctions. |
||||
410 | * |
||||
411 | * @param string $name |
||||
412 | * |
||||
413 | * @return string |
||||
414 | */ |
||||
415 | 1 | private static function fixConjunction(string $name): string |
|||
416 | { |
||||
417 | 1 | foreach (self::CONJUNCTIONS as $conjunction) { |
|||
418 | 1 | $name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), $name); |
|||
0 ignored issues
–
show
It seems like
$name can also be of type null ; however, parameter $string of mb_ereg_replace() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
419 | } |
||||
420 | 1 | return $name; |
|||
421 | } |
||||
422 | |||||
423 | /** |
||||
424 | * Fix post-nominal letter cases. |
||||
425 | * |
||||
426 | * @param string $name |
||||
427 | * @return string |
||||
428 | */ |
||||
429 | 21 | private static function fixPostNominal(string $name): string |
|||
430 | { |
||||
431 | 21 | $postNominals = array_diff(self::POST_NOMINALS, self::$postNominalsExcluded); |
|||
432 | 21 | foreach ($postNominals as $postNominal) { |
|||
433 | 21 | $name = mb_ereg_replace('\b' . $postNominal . '\b', $postNominal, $name, 'ix'); |
|||
0 ignored issues
–
show
It seems like
$name can also be of type null ; however, parameter $string of mb_ereg_replace() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
434 | } |
||||
435 | 21 | return $name; |
|||
436 | } |
||||
437 | } |
||||
438 |