Passed
Push — master ( 2becaa...9477b6 )
by Yuri
11:28
created

Formatter::excludePostNominals()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 11
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 3.0416

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
eloc 5
c 1
b 0
f 0
nc 4
nop 1
dl 0
loc 11
ccs 5
cts 6
cp 0.8333
crap 3.0416
rs 10
1
<?php namespace Tamtamchik\NameCase;
2
3
/**
4
 * Class Formatter.
5
 */
6
class Formatter
7
{
8
    // Irish exceptions.
9
    private const EXCEPTIONS = [
10
        '\bMacEdo' => 'Macedo',
11
        '\bMacEvicius' => 'Macevicius',
12
        '\bMacHado' => 'Machado',
13
        '\bMacHar' => 'Machar',
14
        '\bMacHin' => 'Machin',
15
        '\bMacHlin' => 'Machlin',
16
        '\bMacIas' => 'Macias',
17
        '\bMacIulis' => 'Maciulis',
18
        '\bMacKie' => 'Mackie',
19
        '\bMacKle' => 'Mackle',
20
        '\bMacKlin' => 'Macklin',
21
        '\bMacKmin' => 'Mackmin',
22
        '\bMacQuarie' => 'Macquarie',
23
        '\bMacOmber' => 'Macomber',
24
        '\bMacIn' => 'Macin',
25
        '\bMacKintosh' => 'Mackintosh',
26
        '\bMacKen' => 'Macken',
27
        '\bMacHen' => 'Machen',
28
        '\bMacisaac' => 'MacIsaac',
29
        '\bMacHiel' => 'Machiel',
30
        '\bMacIol' => 'Maciol',
31
        '\bMacKell' => 'Mackell',
32
        '\bMacKlem' => 'Macklem',
33
        '\bMacKrell' => 'Mackrell',
34
        '\bMacLin' => 'Maclin',
35
        '\bMacKey' => 'Mackey',
36
        '\bMacKley' => 'Mackley',
37
        '\bMacHell' => 'Machell',
38
        '\bMacHon' => 'Machon',
39
    ];
40
41
    // General replacements.
42
    private const REPLACEMENTS = [
43
        '\bAl(?=\s+\w)' => 'al',        // al Arabic or forename Al.
44
        '\bAp\b' => 'ap',        // ap Welsh.
45
        '\b(Bin|Binti|Binte)\b' => 'bin',       // bin, binti, binte Arabic.
46
        '\bDell([ae])\b' => 'dell\1',    // della and delle Italian.
47
        '\bD([aeiou])\b' => 'd\1',       // da, de, di Italian; du French; do Brasil.
48
        '\bD([ao]s)\b' => 'd\1',       // das, dos Brasileiros.
49
        '\bDe([lrn])\b' => 'de\1',      // del Italian; der/den Dutch/Flemish.
50
        '\bL([eo])\b' => 'l\1',       // lo Italian; le French.
51
        '\bTe([rn])\b' => 'te\1',      // ten, ter Dutch/Flemish.
52
        '\bVan(?=\s+\w)' => 'van',       // van German or forename Van.
53
        '\bVon\b' => 'von',       // von Dutch/Flemish.
54
    ];
55
56
    private const SPANISH = [
57
        '\bEl\b' => 'el',        // el Greek or El Spanish.
58
        '\bLa\b' => 'la',        // la French or La Spanish.
59
    ];
60
61
    private const HEBREW = [
62
        '\bBen(?=\s+\w)' => 'ben', // ben Hebrew or forename Ben.
63
        '\bBat(?=\s+\w)' => 'bat', // bat Hebrew or forename Bat.
64
    ];
65
66
    // Spanish conjunctions.
67
    private const CONJUNCTIONS = ['Y', 'E', 'I'];
68
69
    // Roman letters regexp.
70
    private const ROMAN_REGEX = '\b((?:[Xx]{1,3}|[Xx][Ll]|[Ll][Xx]{0,3})?(?:[Ii]{1,3}|[Ii][VvXx]|[Vv][Ii]{0,3})?)\b';
71
72
    // Post nominal values.
73
    private const POST_NOMINALS = [
74
        'ACILEx', 'ACSM', 'ADC', 'AEPC', 'AFC', 'AFM', 'AICSM', 'AKC', 'AM', 'ARBRIBA', 'ARCS', 'ARRC', 'ARSM', 'AUH',
75
        'AUS',
76
        'BA', 'BArch', 'BCh', 'BChir', 'BCL', 'BDS', 'BEd', 'BEM', 'BEng', 'BM', 'BS', 'BSc', 'BSW', 'BVM&S',
77
        'BVScBVetMed',
78
        'CB', 'CBE', 'CEng', 'CertHE', 'CGC', 'CGM', 'CH', 'CIE', 'CMarEngCMarSci', 'CMarTech', 'CMG', 'CMILT',
79
        'CML', 'CPhT', 'CPLCTP', 'CPM', 'CQSW', 'CSciTeach', 'CSI', 'CTL', 'CVO',
80
        'DBE', 'DBEnv', 'DC', 'DCB', 'DCM', 'DCMG', 'DConstMgt', 'DCVO', 'DD', 'DEM', 'DFC', 'DFM', 'DIC', 'Dip',
81
        'DipHE', 'DipLP', 'DipSW', 'DL', 'DLitt', 'DLP', 'DPhil', 'DProf', 'DPT', 'DREst', 'DSC', 'DSM', 'DSO',
82
        'DSocSci',
83
        'ED', 'EdD', 'EJLog', 'EMLog', 'EN', 'EngD', 'EngTech', 'ERD', 'ESLog',
84
        'FADO', 'FAWM', 'FBDOFCOptom', 'FCEM', 'FCILEx', 'FCILT', 'FCSP.', 'FdAFdSc', 'FdEng', 'FFHOM', 'FFPM',
85
        'FRCAFFPMRCA', 'FRCGP', 'FRCOG', 'FRCP', 'FRCPsych', 'FRCS', 'FRCVS', 'FSCR.',
86
        'GBE', 'GC', 'GCB', 'GCIE', 'GCILEx', 'GCMG', 'GCSI', 'GCVO', 'GM',
87
        'HNC', 'HNCert', 'HND', 'HNDip',
88
        'ICTTech', 'IDSM', 'IEng', 'IMarEng', 'IOMCPM', 'ISO',
89
        'J', 'JP', 'JrLog',
90
        'KBE', 'KC', 'KCB', 'KCIE', 'KCMG', 'KCSI', 'KCVO', 'KG', 'KP', 'KT',
91
        'LFHOM', 'LG', 'LJ', 'LLB', 'LLD', 'LLM', 'Log', 'LPE', /* 'LT', - excluded, see initial names */
92
        'LVO',
93
        'MA', 'MAcc', 'MAnth', 'MArch', 'MarEngTech', 'MB', 'MBA', 'MBChB', 'MBE', 'MBEIOM', 'MBiochem', 'MC', 'MCEM',
94
        'MCGI', 'MCh.', 'MChem', 'MChiro', 'MClinRes', 'MComp', 'MCOptom', 'MCSM', 'MCSP', 'MD', 'MEarthSc',
95
        'MEng', 'MEnt', 'MEP', 'MFHOM', 'MFin', 'MFPM', 'MGeol', 'MILT', 'MJur', 'MLA', 'MLitt', 'MM', 'MMath',
96
        'MMathStat', 'MMORSE', 'MMus', 'MOst', 'MP', 'MPAMEd', 'MPharm', 'MPhil', 'MPhys', 'MRCGP', 'MRCOG',
97
        'MRCP', 'MRCPath', 'MRCPCHFRCPCH', 'MRCPsych', 'MRCS', 'MRCVS', 'MRes',
98
        /* 'MS', - excluded, see initial names */
99
        'MSc', 'MScChiro', 'MSci',
100
        'MSCR', 'MSM', 'MSocSc', 'MSP', 'MSt', 'MSW', 'MSYP', 'MVO',
101
        'NPQH',
102
        'OBE', 'OBI', 'OM', 'OND',
103
        'PgC', 'PGCAP', 'PGCE', 'PgCert', 'PGCHE', 'PgCLTHE', 'PgD', 'PGDE', 'PgDip', 'PhD', 'PLog', 'PLS',
104
        'QAM', 'QC', 'QFSM', 'QGM', 'QHC', 'QHDS', 'QHNS', 'QHP', 'QHS', 'QPM', 'QS', 'QTSCSci',
105
        'RD', 'RFHN', 'RGN', 'RHV', 'RIAI', 'RIAS', 'RM', 'RMN', 'RN', 'RN1RNA', 'RN2', 'RN3', 'RN4', 'RN5', 'RN6', 'RN7', 'RN8', 'RN9', 'RNC', 'RNLD', 'RNMH', 'ROH', 'RRC', 'RSAW', 'RSci', 'RSciTech', 'RSCN', 'RSN', 'RVM', 'RVN',
106
        'SCHM', 'SCJ', 'SCLD', 'SEN', 'SGM', 'SL', 'SPANSPMH', 'SPCC', 'SPCN', 'SPDN', 'SPHP', 'SPLD', 'SrLog', 'SRN', 'SROT',
107
        'TD',
108
        'UD',
109
        'V100', 'V200', 'V300', 'VC', 'VD', 'VetMB', 'VN', 'VRD'
110
    ];
111
112
    // Excluded post-nominals
113
    private const INITIAL_NAME_REGEX = '\b(Aj|[bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ]{2})\s';
114
115
    // Most two-letter words with no vowels should be kept in all caps as initials
116
    private const INITIAL_NAME_EXCEPTIONS = [
117
        'Mr',
118
        'Ms', // Replaces Member of the Senedd post nominal.
119
        'Dr',
120
        'St',
121
        'Jr',
122
        'Sr',
123
        'Lt', // Replaces Lady of the Order of the Thistle post nominal.
124
    ];
125
    private const LOWER_CASE_WORDS = ['The', 'Of', 'And'];
126
127
    // Lowercase words
128
    private static $postNominalsExcluded = [];
129
130
    // Default options.
131
    private static $options = [
132
        'lazy' => true,
133
        'irish' => true,
134
        'spanish' => false,
135
        'roman' => true,
136
        'hebrew' => true,
137
        'postnominal' => true,
138
    ];
139
140
    /**
141
     * Formatter constructor.
142
     *
143
     * @param array $options
144
     */
145 4
    public function __construct(array $options = [])
146
    {
147 4
        $this->setOptions($options);
148
    }
149
150
    /**
151
     * Global options setter.
152
     *
153
     * @param array $options
154
     */
155 46
    public static function setOptions(array $options): void
156
    {
157 46
        self::$options = array_merge(self::$options, $options);
158
    }
159
160
    /**
161
     * Global post-nominals exclusions setter.
162
     *
163
     * @param array|string|null $values
164
     * @return boolean|void
165
     */
166 4
    public static function excludePostNominals($values)
167
    {
168 4
        if (is_string($values)) {
169 2
            $values = [$values];
170
        }
171
172 4
        if ( ! is_array($values)) {
173
            return false;
174
        }
175
176 4
        self::$postNominalsExcluded = array_merge(self::$postNominalsExcluded, $values);
177
    }
178
179
    /**
180
     * Main function for NameCase.
181
     *
182
     * @param string|null $name
183
     * @param array|null $options
184
     *
185
     * @return string
186
     */
187 46
    public static function nameCase(?string $name = '', ?array $options = []): string
188
    {
189 46
        $name = is_null($name) ? '' : $name;
190
191 46
        self::setOptions($options);
192
193
        // Do not do anything if string is mixed and lazy option is true.
194 46
        if ( ! self::canBeProcessed($name)) {
195 10
            return $name;
196
        }
197
198 40
        $original = $name;
199
200
        // Capitalize
201 40
        $name = self::capitalize($name);
202 40
        foreach (self::getReplacements() as $pattern => $replacement) {
203 40
            $name = mb_ereg_replace($pattern, $replacement, $name);
204 40
            if ( ! is_string($name)) {
205
                return $original;
206
            }
207
        }
208
209 40
        $name = self::correctInitialNames($name);
210 40
        $name = self::correctLowerCaseWords($name);
211
212 40
        return self::processOptions($name);
213
    }
214
215
    /**
216
     * Check if string can be processed.
217
     *
218
     * @param string $name
219
     *
220
     * @return bool
221
     */
222 46
    private static function canBeProcessed(string $name): bool
223
    {
224 46
        if ($name != '') {
225 42
            return ! (self::$options['lazy'] && self::skipMixed($name));
226
        }
227
228 4
        return false;
229
    }
230
231
    /**
232
     * Skip if string is mixed case.
233
     *
234
     * @param string $name
235
     *
236
     * @return bool
237
     */
238 42
    private static function skipMixed(string $name): bool
239
    {
240 42
        $firstLetterLower = $name[0] == mb_strtolower($name[0]);
241 42
        $allLowerOrUpper = (mb_strtolower($name) == $name || mb_strtoupper($name) == $name);
242
243 42
        return ! ($firstLetterLower || $allLowerOrUpper);
244
    }
245
246
    /**
247
     * Capitalize first letters.
248
     *
249
     * @param string $name
250
     *
251
     * @return string
252
     */
253 40
    private static function capitalize(string $name): string
254
    {
255 40
        $name = mb_strtolower($name);
256
257 40
        $name = mb_ereg_replace_callback('\b\w', function ($matches) {
258 40
            return mb_strtoupper($matches[0]);
259 40
        }, $name);
260
261
        // Lowercase 's
262 40
        $name = mb_ereg_replace_callback('\'\w\b', function ($matches) {
263 6
            return mb_strtolower($matches[0]);
264 40
        }, $name);
265
266 40
        return self::updateIrish($name);
267
    }
268
269
    /**
270
     * Update for Irish names.
271
     *
272
     * @param string $name
273
     *
274
     * @return string
275
     */
276 40
    private static function updateIrish(string $name): string
277
    {
278 40
        if ( ! self::$options['irish']) return $name;
279
280
        if (
281 40
            mb_ereg_match('.*?\bMac[A-Za-z]{2,}[^aciozj]\b', $name) ||
282 40
            mb_ereg_match('.*?\bMc', $name)
283
        ) {
284 14
            $name = self::updateMac($name);
285
        }
286
287 40
        return mb_ereg_replace('Macmurdo', 'MacMurdo', $name);
288
    }
289
290
    /**
291
     * Updates irish Mac & Mc.
292
     *
293
     * @param string $name
294
     *
295
     * @return string
296
     */
297 14
    private static function updateMac(string $name): string
298
    {
299 14
        $name = mb_ereg_replace_callback('\b(Ma?c)([A-Za-z]+)', function ($matches) {
300 14
            return $matches[1] . mb_strtoupper(mb_substr($matches[2], 0, 1)) . mb_substr($matches[2], 1);
301 14
        }, $name);
302
303
        // Now fix "Mac" exceptions
304 14
        foreach (self::EXCEPTIONS as $pattern => $replacement) {
305 14
            $name = mb_ereg_replace($pattern, $replacement, $name);
306
        }
307
308 14
        return $name;
309
    }
310
311
    /**
312
     * Define required replacements.
313
     *
314
     * @return array
315
     */
316 40
    private static function getReplacements(): array
317
    {
318
        // General fixes
319 40
        $replacements = self::REPLACEMENTS;
320 40
        if ( ! self::$options['spanish']) {
321 40
            $replacements = array_merge($replacements, self::SPANISH);
322
        }
323
324 40
        if (self::$options['hebrew']) {
325 40
            $replacements = array_merge($replacements, self::HEBREW);
326
        }
327
328 40
        return $replacements;
329
    }
330
331
    /**
332
     * Correct capitalization of initial names like JJ and TJ.
333
     *
334
     * @param string $name
335
     *
336
     * @return string
337
     */
338 40
    private static function correctInitialNames(string $name): string
339
    {
340 40
        return mb_ereg_replace_callback(self::INITIAL_NAME_REGEX, function ($matches) {
341 2
            $match = $matches[0];
342
343 2
            if (in_array($matches[1], self::INITIAL_NAME_EXCEPTIONS)) {
344 2
                return $match;
345
            }
346
347 2
            return mb_strtoupper($match);
348 40
        }, $name);
349
    }
350
351
    /**
352
     * Correct lower-case words of titles.
353
     *
354
     * @param string $name
355
     *
356
     * @return string
357
     */
358 40
    private static function correctLowerCaseWords(string $name): string
359
    {
360 40
        foreach (self::LOWER_CASE_WORDS as $lowercase) {
361 40
            $name = mb_ereg_replace('\b' . $lowercase . '\b', mb_strtolower($lowercase), $name);
0 ignored issues
show
Bug introduced by
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

361
            $name = mb_ereg_replace('\b' . $lowercase . '\b', mb_strtolower($lowercase), /** @scrutinizer ignore-type */ $name);
Loading history...
362
        }
363 40
        return $name;
364
    }
365
366
    /**
367
     * Process options with given name
368
     *
369
     * @param string $name
370
     *
371
     * @return string
372
     */
373 40
    private static function processOptions(string $name): string
374
    {
375 40
        if (self::$options['roman']) {
376 40
            $name = self::updateRoman($name);
377
        }
378
379 40
        if (self::$options['spanish']) {
380 2
            $name = self::fixConjunction($name);
381
        }
382
383 40
        if (self::$options['postnominal']) {
384 40
            $name = self::fixPostNominal($name);
385
        }
386
387 40
        return $name;
388
    }
389
390
    /**
391
     * Fix roman numeral names.
392
     *
393
     * @param string $name
394
     *
395
     * @return string
396
     */
397 40
    private static function updateRoman(string $name): string
398
    {
399 40
        return mb_ereg_replace_callback(self::ROMAN_REGEX, function ($matches) {
400 40
            return mb_strtoupper($matches[0]);
401 40
        }, $name);
402
    }
403
404
    /**
405
     * Fix Spanish conjunctions.
406
     *
407
     * @param string $name
408
     *
409
     * @return string
410
     */
411 2
    private static function fixConjunction(string $name): string
412
    {
413 2
        foreach (self::CONJUNCTIONS as $conjunction) {
414 2
            $name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), $name);
0 ignored issues
show
Bug introduced by
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

414
            $name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), /** @scrutinizer ignore-type */ $name);
Loading history...
415
        }
416 2
        return $name;
417
    }
418
419
    /**
420
     * Fix post-nominal letter cases.
421
     *
422
     * @param string $name
423
     * @return string
424
     */
425 40
    private static function fixPostNominal(string $name): string
426
    {
427 40
        $postNominals = array_diff(self::POST_NOMINALS, self::$postNominalsExcluded);
428 40
        foreach ($postNominals as $postNominal) {
429 40
            $name = mb_ereg_replace('\b' . $postNominal . '\b', $postNominal, $name, 'ix');
0 ignored issues
show
Bug introduced by
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

429
            $name = mb_ereg_replace('\b' . $postNominal . '\b', $postNominal, /** @scrutinizer ignore-type */ $name, 'ix');
Loading history...
430
        }
431 40
        return $name;
432
    }
433
}
434