Issues (3)

src/Formatter.php (3 issues)

Labels
Severity
1
<?php namespace Tamtamchik\NameCase;
2
3
/**
4
 * Class Formatter.
5
 */
6
class Formatter
7
{
8
    // Irish exceptions.
9
    private const EXCEPTIONS = [
10
        '\bMacEdo' => 'Macedo',
11
        '\bMacEvicius' => 'Macevicius',
12
        '\bMacHado' => 'Machado',
13
        '\bMacHar' => 'Machar',
14
        '\bMacHin' => 'Machin',
15
        '\bMacHlin' => 'Machlin',
16
        '\bMacIas' => 'Macias',
17
        '\bMacIulis' => 'Maciulis',
18
        '\bMacKie' => 'Mackie',
19
        '\bMacKle' => 'Mackle',
20
        '\bMacKlin' => 'Macklin',
21
        '\bMacKmin' => 'Mackmin',
22
        '\bMacQuarie' => 'Macquarie',
23
        '\bMacOmber' => 'Macomber',
24
        '\bMacIn' => 'Macin',
25
        '\bMacKintosh' => 'Mackintosh',
26
        '\bMacKen' => 'Macken',
27
        '\bMacHen' => 'Machen',
28
        '\bMacisaac' => 'MacIsaac',
29
        '\bMacHiel' => 'Machiel',
30
        '\bMacIol' => 'Maciol',
31
        '\bMacKell' => 'Mackell',
32
        '\bMacKlem' => 'Macklem',
33
        '\bMacKrell' => 'Mackrell',
34
        '\bMacLin' => 'Maclin',
35
        '\bMacKey' => 'Mackey',
36
        '\bMacKley' => 'Mackley',
37
        '\bMacHell' => 'Machell',
38
        '\bMacHon' => 'Machon',
39
    ];
40
41
    // General replacements.
42
    private const REPLACEMENTS = [
43
        '\bAl(?=\s+\w)' => 'al',        // al Arabic or forename Al.
44
        '\bAp\b' => 'ap',        // ap Welsh.
45
        '\b(Bin|Binti|Binte)\b' => 'bin',       // bin, binti, binte Arabic.
46
        '\bDell([ae])\b' => 'dell\1',    // della and delle Italian.
47
        '\bD([aeiou])\b' => 'd\1',       // da, de, di Italian; du French; do Brasil.
48
        '\bD([ao]s)\b' => 'd\1',       // das, dos Brasileiros.
49
        '\bDe([lrn])\b' => 'de\1',      // del Italian; der/den Dutch/Flemish.
50
        '\bL([eo])\b' => 'l\1',       // lo Italian; le French.
51
        '\bTe([rn])\b' => 'te\1',      // ten, ter Dutch/Flemish.
52
        '\bVan(?=\s+\w)' => 'van',       // van German or forename Van.
53
        '\bVon\b' => 'von',       // von Dutch/Flemish.
54
    ];
55
56
    private const SPANISH = [
57
        '\bEl\b' => 'el',        // el Greek or El Spanish.
58
        '\bLa\b' => 'la',        // la French or La Spanish.
59
    ];
60
61
    private const HEBREW = [
62
        '\bBen(?=\s+\w)' => 'ben', // ben Hebrew or forename Ben.
63
        '\bBat(?=\s+\w)' => 'bat', // bat Hebrew or forename Bat.
64
    ];
65
66
    // Spanish conjunctions.
67
    private const CONJUNCTIONS = ['Y', 'E', 'I'];
68
69
    // Roman letters regexp.
70
    private const ROMAN_REGEX = '\b((?:[Xx]{1,3}|[Xx][Ll]|[Ll][Xx]{0,3})?(?:[Ii]{1,3}|[Ii][VvXx]|[Vv][Ii]{0,3})?)\b';
71
72
    // Post nominal values.
73
    private const POST_NOMINALS = [
74
        'ACILEx', 'ACSM', 'ADC', 'AEPC', 'AFC', 'AFM', 'AICSM', 'AKC', 'AM', 'ARBRIBA', 'ARCS', 'ARRC', 'ARSM', 'AUH',
75
        'AUS',
76
        'BA', 'BArch', 'BCh', 'BChir', 'BCL', 'BDS', 'BEd', 'BEM', 'BEng', 'BM', 'BS', 'BSc', 'BSW', 'BVM&S',
77
        'BVScBVetMed',
78
        'CB', 'CBE', 'CEng', 'CertHE', 'CGC', 'CGM', 'CH', 'CIE', 'CMarEngCMarSci', 'CMarTech', 'CMG', 'CMILT',
79
        'CML', 'CPhT', 'CPLCTP', 'CPM', 'CQSW', 'CSciTeach', 'CSI', 'CTL', 'CVO',
80
        'DBE', 'DBEnv', 'DC', 'DCB', 'DCM', 'DCMG', 'DConstMgt', 'DCVO', 'DD', 'DEM', 'DFC', 'DFM', 'DIC', 'Dip',
81
        'DipHE', 'DipLP', 'DipSW', 'DL', 'DLitt', 'DLP', 'DPhil', 'DProf', 'DPT', 'DREst', 'DSC', 'DSM', 'DSO',
82
        'DSocSci',
83
        'ED', 'EdD', 'EJLog', 'EMLog', 'EN', 'EngD', 'EngTech', 'ERD', 'ESLog',
84
        'FADO', 'FAWM', 'FBDOFCOptom', 'FCEM', 'FCILEx', 'FCILT', 'FCSP.', 'FdAFdSc', 'FdEng', 'FFHOM', 'FFPM',
85
        'FRCAFFPMRCA', 'FRCGP', 'FRCOG', 'FRCP', 'FRCPsych', 'FRCS', 'FRCVS', 'FSCR.',
86
        'GBE', 'GC', 'GCB', 'GCIE', 'GCILEx', 'GCMG', 'GCSI', 'GCVO', 'GM',
87
        'HNC', 'HNCert', 'HND', 'HNDip',
88
        'ICTTech', 'IDSM', 'IEng', 'IMarEng', 'IOMCPM', 'ISO',
89
        'J', 'JP', 'JrLog',
90
        'KBE', 'KC', 'KCB', 'KCIE', 'KCMG', 'KCSI', 'KCVO', 'KG', 'KP', 'KT',
91
        'LFHOM', 'LG', 'LJ', 'LLB', 'LLD', 'LLM', 'Log', 'LPE', /* 'LT', - excluded, see initial names */
92
        'LVO',
93
        'MA', 'MAcc', 'MAnth', 'MArch', 'MarEngTech', 'MB', 'MBA', 'MBChB', 'MBE', 'MBEIOM', 'MBiochem', 'MC', 'MCEM',
94
        'MCGI', 'MCh.', 'MChem', 'MChiro', 'MClinRes', 'MComp', 'MCOptom', 'MCSM', 'MCSP', 'MD', 'MEarthSc',
95
        'MEng', 'MEnt', 'MEP', 'MFHOM', 'MFin', 'MFPM', 'MGeol', 'MILT', 'MJur', 'MLA', 'MLitt', 'MM', 'MMath',
96
        'MMathStat', 'MMORSE', 'MMus', 'MOst', 'MP', 'MPAMEd', 'MPharm', 'MPhil', 'MPhys', 'MRCGP', 'MRCOG',
97
        'MRCP', 'MRCPath', 'MRCPCHFRCPCH', 'MRCPsych', 'MRCS', 'MRCVS', 'MRes',
98
        /* 'MS', - excluded, see initial names */
99
        'MSc', 'MScChiro', 'MSci',
100
        'MSCR', 'MSM', 'MSocSc', 'MSP', 'MSt', 'MSW', 'MSYP', 'MVO',
101
        'NPQH',
102
        'OBE', 'OBI', 'OM', 'OND',
103
        'PgC', 'PGCAP', 'PGCE', 'PgCert', 'PGCHE', 'PgCLTHE', 'PgD', 'PGDE', 'PgDip', 'PhD', 'PLog', 'PLS',
104
        'QAM', 'QC', 'QFSM', 'QGM', 'QHC', 'QHDS', 'QHNS', 'QHP', 'QHS', 'QPM', 'QS', 'QTSCSci',
105
        'RD', 'RFHN', 'RGN', 'RHV', 'RIAI', 'RIAS', 'RM', 'RMN', 'RN', 'RN1RNA', 'RN2', 'RN3', 'RN4', 'RN5', 'RN6', 'RN7', 'RN8', 'RN9', 'RNC', 'RNLD', 'RNMH', 'ROH', 'RRC', 'RSAW', 'RSci', 'RSciTech', 'RSCN', 'RSN', 'RVM', 'RVN',
106
        'SCHM', 'SCJ', 'SCLD', 'SEN', 'SGM', 'SL', 'SPANSPMH', 'SPCC', 'SPCN', 'SPDN', 'SPHP', 'SPLD', 'SrLog', 'SRN', 'SROT',
107
        'TD',
108
        'UD',
109
        'V100', 'V200', 'V300', 'VC', 'VD', 'VetMB', 'VN', 'VRD'
110
    ];
111
112
    // Excluded post-nominals
113
    private const INITIAL_NAME_REGEX = '\b(Aj|[bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ]{2})\s';
114
115
    // Most two-letter words with no vowels should be kept in all caps as initials
116
    private const INITIAL_NAME_EXCEPTIONS = [
117
        'Mr',
118
        'Ms', // Replaces Member of the Senedd post nominal.
119
        'Dr',
120
        'St',
121
        'Jr',
122
        'Sr',
123
        'Lt', // Replaces Lady of the Order of the Thistle post nominal.
124
    ];
125
    private const LOWER_CASE_WORDS = ['The', 'Of', 'And'];
126
127
    // Lowercase words
128
    private static $postNominalsExcluded = [];
129
130
    // Default options.
131
    private static $options = [
132
        'lazy' => true,
133
        'irish' => true,
134
        'spanish' => false,
135
        'roman' => true,
136
        'hebrew' => true,
137
        'postnominal' => true,
138
    ];
139
140
    /**
141
     * Formatter constructor.
142
     *
143
     * @param array $options
144
     */
145 2
    public function __construct(array $options = [])
146
    {
147 2
        $this->setOptions($options);
148
    }
149
150
    /**
151
     * Global options setter.
152
     *
153
     * @param array $options
154
     */
155 24
    public static function setOptions(array $options): void
156
    {
157 24
        self::$options = array_merge(self::$options, $options);
158
    }
159
160
    /**
161
     * Global post-nominals exclusions setter.
162
     *
163
     * @param array|string|null $values
164
     * @return boolean|void
165
     */
166 3
    public static function excludePostNominals($values)
167
    {
168 3
        if (is_string($values)) {
169 1
            $values = [$values];
170
        }
171
172 3
        if ( ! is_array($values)) {
173 1
            return false;
174
        }
175
176 2
        self::$postNominalsExcluded = array_merge(self::$postNominalsExcluded, $values);
177
    }
178
179
    /**
180
     * Main function for NameCase.
181
     *
182
     * @param string|null $name
183
     * @param array|null $options
184
     *
185
     * @return string
186
     */
187 24
    public static function nameCase(?string $name = '', ?array $options = []): string
188
    {
189 24
        $name = is_null($name) ? '' : $name;
190
191 24
        self::setOptions($options);
192
193
        // Do not do anything if string is mixed and lazy option is true.
194 24
        if ( ! self::canBeProcessed($name)) {
195 5
            return $name;
196
        }
197
198 21
        $original = $name;
199
200
        // Capitalize
201 21
        $name = self::capitalize($name);
202 21
        foreach (self::getReplacements() as $pattern => $replacement) {
203 21
            $name = mb_ereg_replace($pattern, $replacement, $name);
204
205
            // Very difficult to write a test in modern environments
206
            // @codeCoverageIgnoreStart
207
            if ( ! is_string($name)) {
208
                return $original;
209
            }
210
            // @codeCoverageIgnoreEnd
211
        }
212
213 21
        $name = self::correctInitialNames($name);
214 21
        $name = self::correctLowerCaseWords($name);
215
216 21
        return self::processOptions($name);
217
    }
218
219
    /**
220
     * Check if string can be processed.
221
     *
222
     * @param string $name
223
     *
224
     * @return bool
225
     */
226 24
    private static function canBeProcessed(string $name): bool
227
    {
228 24
        if ($name != '') {
229 22
            return ! (self::$options['lazy'] && self::skipMixed($name));
230
        }
231
232 2
        return false;
233
    }
234
235
    /**
236
     * Skip if string is mixed case.
237
     *
238
     * @param string $name
239
     *
240
     * @return bool
241
     */
242 22
    private static function skipMixed(string $name): bool
243
    {
244 22
        $firstLetterLower = $name[0] == mb_strtolower($name[0]);
245 22
        $allLowerOrUpper = (mb_strtolower($name) == $name || mb_strtoupper($name) == $name);
246
247 22
        return ! ($firstLetterLower || $allLowerOrUpper);
248
    }
249
250
    /**
251
     * Capitalize first letters.
252
     *
253
     * @param string $name
254
     *
255
     * @return string
256
     */
257 21
    private static function capitalize(string $name): string
258
    {
259 21
        $name = mb_strtolower($name);
260
261 21
        $name = mb_ereg_replace_callback('\b\w', function ($matches) {
262 21
            return mb_strtoupper($matches[0]);
263 21
        }, $name);
264
265
        // Lowercase 's
266 21
        $name = mb_ereg_replace_callback('\'\w\b', function ($matches) {
267 3
            return mb_strtolower($matches[0]);
268 21
        }, $name);
269
270 21
        return self::updateIrish($name);
271
    }
272
273
    /**
274
     * Update for Irish names.
275
     *
276
     * @param string $name
277
     *
278
     * @return string
279
     */
280 21
    private static function updateIrish(string $name): string
281
    {
282 21
        if ( ! self::$options['irish']) return $name;
283
284
        if (
285 21
            mb_ereg_match('.*?\bMac[A-Za-z]{2,}[^aciozj]\b', $name) ||
286 21
            mb_ereg_match('.*?\bMc', $name)
287
        ) {
288 7
            $name = self::updateMac($name);
289
        }
290
291 21
        return mb_ereg_replace('Macmurdo', 'MacMurdo', $name);
292
    }
293
294
    /**
295
     * Updates irish Mac & Mc.
296
     *
297
     * @param string $name
298
     *
299
     * @return string
300
     */
301 7
    private static function updateMac(string $name): string
302
    {
303 7
        $name = mb_ereg_replace_callback('\b(Ma?c)([A-Za-z]+)', function ($matches) {
304 7
            return $matches[1] . mb_strtoupper(mb_substr($matches[2], 0, 1)) . mb_substr($matches[2], 1);
305 7
        }, $name);
306
307
        // Now fix "Mac" exceptions
308 7
        foreach (self::EXCEPTIONS as $pattern => $replacement) {
309 7
            $name = mb_ereg_replace($pattern, $replacement, $name);
310
        }
311
312 7
        return $name;
313
    }
314
315
    /**
316
     * Define required replacements.
317
     *
318
     * @return array
319
     */
320 21
    private static function getReplacements(): array
321
    {
322
        // General fixes
323 21
        $replacements = self::REPLACEMENTS;
324 21
        if ( ! self::$options['spanish']) {
325 21
            $replacements = array_merge($replacements, self::SPANISH);
326
        }
327
328 21
        if (self::$options['hebrew']) {
329 21
            $replacements = array_merge($replacements, self::HEBREW);
330
        }
331
332 21
        return $replacements;
333
    }
334
335
    /**
336
     * Correct capitalization of initial names like JJ and TJ.
337
     *
338
     * @param string $name
339
     *
340
     * @return string
341
     */
342 21
    private static function correctInitialNames(string $name): string
343
    {
344 21
        return mb_ereg_replace_callback(self::INITIAL_NAME_REGEX, function ($matches) {
345 1
            $match = $matches[0];
346
347 1
            if (in_array($matches[1], self::INITIAL_NAME_EXCEPTIONS)) {
348 1
                return $match;
349
            }
350
351 1
            return mb_strtoupper($match);
352 21
        }, $name);
353
    }
354
355
    /**
356
     * Correct lower-case words of titles.
357
     *
358
     * @param string $name
359
     *
360
     * @return string
361
     */
362 21
    private static function correctLowerCaseWords(string $name): string
363
    {
364 21
        foreach (self::LOWER_CASE_WORDS as $lowercase) {
365 21
            $name = mb_ereg_replace('\b' . $lowercase . '\b', mb_strtolower($lowercase), $name);
0 ignored issues
show
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

365
            $name = mb_ereg_replace('\b' . $lowercase . '\b', mb_strtolower($lowercase), /** @scrutinizer ignore-type */ $name);
Loading history...
366
        }
367 21
        return $name;
368
    }
369
370
    /**
371
     * Process options with given name
372
     *
373
     * @param string $name
374
     *
375
     * @return string
376
     */
377 21
    private static function processOptions(string $name): string
378
    {
379 21
        if (self::$options['roman']) {
380 21
            $name = self::updateRoman($name);
381
        }
382
383 21
        if (self::$options['spanish']) {
384 1
            $name = self::fixConjunction($name);
385
        }
386
387 21
        if (self::$options['postnominal']) {
388 21
            $name = self::fixPostNominal($name);
389
        }
390
391 21
        return $name;
392
    }
393
394
    /**
395
     * Fix roman numeral names.
396
     *
397
     * @param string $name
398
     *
399
     * @return string
400
     */
401 21
    private static function updateRoman(string $name): string
402
    {
403 21
        return mb_ereg_replace_callback(self::ROMAN_REGEX, function ($matches) {
404 21
            return mb_strtoupper($matches[0]);
405 21
        }, $name);
406
    }
407
408
    /**
409
     * Fix Spanish conjunctions.
410
     *
411
     * @param string $name
412
     *
413
     * @return string
414
     */
415 1
    private static function fixConjunction(string $name): string
416
    {
417 1
        foreach (self::CONJUNCTIONS as $conjunction) {
418 1
            $name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), $name);
0 ignored issues
show
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

418
            $name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), /** @scrutinizer ignore-type */ $name);
Loading history...
419
        }
420 1
        return $name;
421
    }
422
423
    /**
424
     * Fix post-nominal letter cases.
425
     *
426
     * @param string $name
427
     * @return string
428
     */
429 21
    private static function fixPostNominal(string $name): string
430
    {
431 21
        $postNominals = array_diff(self::POST_NOMINALS, self::$postNominalsExcluded);
432 21
        foreach ($postNominals as $postNominal) {
433 21
            $name = mb_ereg_replace('\b' . $postNominal . '\b', $postNominal, $name, 'ix');
0 ignored issues
show
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

433
            $name = mb_ereg_replace('\b' . $postNominal . '\b', $postNominal, /** @scrutinizer ignore-type */ $name, 'ix');
Loading history...
434
        }
435 21
        return $name;
436
    }
437
}
438