Passed
Push — master ( 9477b6...0dbc99 )
by Yuri
01:45
created

Formatter   A

Complexity

Total Complexity 39

Size/Duplication

Total Lines 430
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 19
Bugs 3 Features 0
Metric Value
eloc 183
c 19
b 3
f 0
dl 0
loc 430
ccs 91
cts 91
cp 1
rs 9.28
wmc 39

16 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 3 1
A setOptions() 0 3 1
A processOptions() 0 15 4
A excludePostNominals() 0 11 3
A nameCase() 0 30 5
A updateMac() 0 12 2
A capitalize() 0 14 1
A skipMixed() 0 6 3
A correctLowerCaseWords() 0 6 2
A correctInitialNames() 0 11 2
A canBeProcessed() 0 7 3
A fixConjunction() 0 6 2
A updateRoman() 0 5 1
A fixPostNominal() 0 7 2
A getReplacements() 0 13 3
A updateIrish() 0 12 4
1
<?php namespace Tamtamchik\NameCase;
2
3
/**
4
 * Class Formatter.
5
 */
6
class Formatter
7
{
8
    // Irish exceptions.
9
    private const EXCEPTIONS = [
10
        '\bMacEdo' => 'Macedo',
11
        '\bMacEvicius' => 'Macevicius',
12
        '\bMacHado' => 'Machado',
13
        '\bMacHar' => 'Machar',
14
        '\bMacHin' => 'Machin',
15
        '\bMacHlin' => 'Machlin',
16
        '\bMacIas' => 'Macias',
17
        '\bMacIulis' => 'Maciulis',
18
        '\bMacKie' => 'Mackie',
19
        '\bMacKle' => 'Mackle',
20
        '\bMacKlin' => 'Macklin',
21
        '\bMacKmin' => 'Mackmin',
22
        '\bMacQuarie' => 'Macquarie',
23
        '\bMacOmber' => 'Macomber',
24
        '\bMacIn' => 'Macin',
25
        '\bMacKintosh' => 'Mackintosh',
26
        '\bMacKen' => 'Macken',
27
        '\bMacHen' => 'Machen',
28
        '\bMacisaac' => 'MacIsaac',
29
        '\bMacHiel' => 'Machiel',
30
        '\bMacIol' => 'Maciol',
31
        '\bMacKell' => 'Mackell',
32
        '\bMacKlem' => 'Macklem',
33
        '\bMacKrell' => 'Mackrell',
34
        '\bMacLin' => 'Maclin',
35
        '\bMacKey' => 'Mackey',
36
        '\bMacKley' => 'Mackley',
37
        '\bMacHell' => 'Machell',
38
        '\bMacHon' => 'Machon',
39
    ];
40
41
    // General replacements.
42
    private const REPLACEMENTS = [
43
        '\bAl(?=\s+\w)' => 'al',        // al Arabic or forename Al.
44
        '\bAp\b' => 'ap',        // ap Welsh.
45
        '\b(Bin|Binti|Binte)\b' => 'bin',       // bin, binti, binte Arabic.
46
        '\bDell([ae])\b' => 'dell\1',    // della and delle Italian.
47
        '\bD([aeiou])\b' => 'd\1',       // da, de, di Italian; du French; do Brasil.
48
        '\bD([ao]s)\b' => 'd\1',       // das, dos Brasileiros.
49
        '\bDe([lrn])\b' => 'de\1',      // del Italian; der/den Dutch/Flemish.
50
        '\bL([eo])\b' => 'l\1',       // lo Italian; le French.
51
        '\bTe([rn])\b' => 'te\1',      // ten, ter Dutch/Flemish.
52
        '\bVan(?=\s+\w)' => 'van',       // van German or forename Van.
53
        '\bVon\b' => 'von',       // von Dutch/Flemish.
54
    ];
55
56
    private const SPANISH = [
57
        '\bEl\b' => 'el',        // el Greek or El Spanish.
58
        '\bLa\b' => 'la',        // la French or La Spanish.
59
    ];
60
61
    private const HEBREW = [
62
        '\bBen(?=\s+\w)' => 'ben', // ben Hebrew or forename Ben.
63
        '\bBat(?=\s+\w)' => 'bat', // bat Hebrew or forename Bat.
64
    ];
65
66
    // Spanish conjunctions.
67
    private const CONJUNCTIONS = ['Y', 'E', 'I'];
68
69
    // Roman letters regexp.
70
    private const ROMAN_REGEX = '\b((?:[Xx]{1,3}|[Xx][Ll]|[Ll][Xx]{0,3})?(?:[Ii]{1,3}|[Ii][VvXx]|[Vv][Ii]{0,3})?)\b';
71
72
    // Post nominal values.
73
    private const POST_NOMINALS = [
74
        'ACILEx', 'ACSM', 'ADC', 'AEPC', 'AFC', 'AFM', 'AICSM', 'AKC', 'AM', 'ARBRIBA', 'ARCS', 'ARRC', 'ARSM', 'AUH',
75
        'AUS',
76
        'BA', 'BArch', 'BCh', 'BChir', 'BCL', 'BDS', 'BEd', 'BEM', 'BEng', 'BM', 'BS', 'BSc', 'BSW', 'BVM&S',
77
        'BVScBVetMed',
78
        'CB', 'CBE', 'CEng', 'CertHE', 'CGC', 'CGM', 'CH', 'CIE', 'CMarEngCMarSci', 'CMarTech', 'CMG', 'CMILT',
79
        'CML', 'CPhT', 'CPLCTP', 'CPM', 'CQSW', 'CSciTeach', 'CSI', 'CTL', 'CVO',
80
        'DBE', 'DBEnv', 'DC', 'DCB', 'DCM', 'DCMG', 'DConstMgt', 'DCVO', 'DD', 'DEM', 'DFC', 'DFM', 'DIC', 'Dip',
81
        'DipHE', 'DipLP', 'DipSW', 'DL', 'DLitt', 'DLP', 'DPhil', 'DProf', 'DPT', 'DREst', 'DSC', 'DSM', 'DSO',
82
        'DSocSci',
83
        'ED', 'EdD', 'EJLog', 'EMLog', 'EN', 'EngD', 'EngTech', 'ERD', 'ESLog',
84
        'FADO', 'FAWM', 'FBDOFCOptom', 'FCEM', 'FCILEx', 'FCILT', 'FCSP.', 'FdAFdSc', 'FdEng', 'FFHOM', 'FFPM',
85
        'FRCAFFPMRCA', 'FRCGP', 'FRCOG', 'FRCP', 'FRCPsych', 'FRCS', 'FRCVS', 'FSCR.',
86
        'GBE', 'GC', 'GCB', 'GCIE', 'GCILEx', 'GCMG', 'GCSI', 'GCVO', 'GM',
87
        'HNC', 'HNCert', 'HND', 'HNDip',
88
        'ICTTech', 'IDSM', 'IEng', 'IMarEng', 'IOMCPM', 'ISO',
89
        'J', 'JP', 'JrLog',
90
        'KBE', 'KC', 'KCB', 'KCIE', 'KCMG', 'KCSI', 'KCVO', 'KG', 'KP', 'KT',
91
        'LFHOM', 'LG', 'LJ', 'LLB', 'LLD', 'LLM', 'Log', 'LPE', /* 'LT', - excluded, see initial names */
92
        'LVO',
93
        'MA', 'MAcc', 'MAnth', 'MArch', 'MarEngTech', 'MB', 'MBA', 'MBChB', 'MBE', 'MBEIOM', 'MBiochem', 'MC', 'MCEM',
94
        'MCGI', 'MCh.', 'MChem', 'MChiro', 'MClinRes', 'MComp', 'MCOptom', 'MCSM', 'MCSP', 'MD', 'MEarthSc',
95
        'MEng', 'MEnt', 'MEP', 'MFHOM', 'MFin', 'MFPM', 'MGeol', 'MILT', 'MJur', 'MLA', 'MLitt', 'MM', 'MMath',
96
        'MMathStat', 'MMORSE', 'MMus', 'MOst', 'MP', 'MPAMEd', 'MPharm', 'MPhil', 'MPhys', 'MRCGP', 'MRCOG',
97
        'MRCP', 'MRCPath', 'MRCPCHFRCPCH', 'MRCPsych', 'MRCS', 'MRCVS', 'MRes',
98
        /* 'MS', - excluded, see initial names */
99
        'MSc', 'MScChiro', 'MSci',
100
        'MSCR', 'MSM', 'MSocSc', 'MSP', 'MSt', 'MSW', 'MSYP', 'MVO',
101
        'NPQH',
102
        'OBE', 'OBI', 'OM', 'OND',
103
        'PgC', 'PGCAP', 'PGCE', 'PgCert', 'PGCHE', 'PgCLTHE', 'PgD', 'PGDE', 'PgDip', 'PhD', 'PLog', 'PLS',
104
        'QAM', 'QC', 'QFSM', 'QGM', 'QHC', 'QHDS', 'QHNS', 'QHP', 'QHS', 'QPM', 'QS', 'QTSCSci',
105
        'RD', 'RFHN', 'RGN', 'RHV', 'RIAI', 'RIAS', 'RM', 'RMN', 'RN', 'RN1RNA', 'RN2', 'RN3', 'RN4', 'RN5', 'RN6', 'RN7', 'RN8', 'RN9', 'RNC', 'RNLD', 'RNMH', 'ROH', 'RRC', 'RSAW', 'RSci', 'RSciTech', 'RSCN', 'RSN', 'RVM', 'RVN',
106
        'SCHM', 'SCJ', 'SCLD', 'SEN', 'SGM', 'SL', 'SPANSPMH', 'SPCC', 'SPCN', 'SPDN', 'SPHP', 'SPLD', 'SrLog', 'SRN', 'SROT',
107
        'TD',
108
        'UD',
109
        'V100', 'V200', 'V300', 'VC', 'VD', 'VetMB', 'VN', 'VRD'
110
    ];
111
112
    // Excluded post-nominals
113
    private const INITIAL_NAME_REGEX = '\b(Aj|[bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ]{2})\s';
114
115
    // Most two-letter words with no vowels should be kept in all caps as initials
116
    private const INITIAL_NAME_EXCEPTIONS = [
117
        'Mr',
118
        'Ms', // Replaces Member of the Senedd post nominal.
119
        'Dr',
120
        'St',
121
        'Jr',
122
        'Sr',
123
        'Lt', // Replaces Lady of the Order of the Thistle post nominal.
124
    ];
125
    private const LOWER_CASE_WORDS = ['The', 'Of', 'And'];
126
127
    // Lowercase words
128
    private static $postNominalsExcluded = [];
129
130
    // Default options.
131
    private static $options = [
132
        'lazy' => true,
133
        'irish' => true,
134
        'spanish' => false,
135
        'roman' => true,
136
        'hebrew' => true,
137
        'postnominal' => true,
138
    ];
139
140
    /**
141
     * Formatter constructor.
142
     *
143
     * @param array $options
144
     */
145 4
    public function __construct(array $options = [])
146
    {
147 4
        $this->setOptions($options);
148
    }
149
150
    /**
151
     * Global options setter.
152
     *
153
     * @param array $options
154
     */
155 48
    public static function setOptions(array $options): void
156
    {
157 48
        self::$options = array_merge(self::$options, $options);
158
    }
159
160
    /**
161
     * Global post-nominals exclusions setter.
162
     *
163
     * @param array|string|null $values
164
     * @return boolean|void
165
     */
166 6
    public static function excludePostNominals($values)
167
    {
168 6
        if (is_string($values)) {
169 2
            $values = [$values];
170
        }
171
172 6
        if ( ! is_array($values)) {
173 2
            return false;
174
        }
175
176 4
        self::$postNominalsExcluded = array_merge(self::$postNominalsExcluded, $values);
177
    }
178
179
    /**
180
     * Main function for NameCase.
181
     *
182
     * @param string|null $name
183
     * @param array|null $options
184
     *
185
     * @return string
186
     */
187 48
    public static function nameCase(?string $name = '', ?array $options = []): string
188
    {
189 48
        $name = is_null($name) ? '' : $name;
190
191 48
        self::setOptions($options);
192
193
        // Do not do anything if string is mixed and lazy option is true.
194 48
        if ( ! self::canBeProcessed($name)) {
195 10
            return $name;
196
        }
197
198 42
        $original = $name;
199
200
        // Capitalize
201 42
        $name = self::capitalize($name);
202 42
        foreach (self::getReplacements() as $pattern => $replacement) {
203 42
            $name = mb_ereg_replace($pattern, $replacement, $name);
204
205
            // XXX: Very difficult to write a test in modern environments
206
            // @codeCoverageIgnoreStart
207
            if ( ! is_string($name)) {
208
                return $original;
209
            }
210
            // @codeCoverageIgnoreEnd
211
        }
212
213 42
        $name = self::correctInitialNames($name);
214 42
        $name = self::correctLowerCaseWords($name);
215
216 42
        return self::processOptions($name);
217
    }
218
219
    /**
220
     * Check if string can be processed.
221
     *
222
     * @param string $name
223
     *
224
     * @return bool
225
     */
226 48
    private static function canBeProcessed(string $name): bool
227
    {
228 48
        if ($name != '') {
229 44
            return ! (self::$options['lazy'] && self::skipMixed($name));
230
        }
231
232 4
        return false;
233
    }
234
235
    /**
236
     * Skip if string is mixed case.
237
     *
238
     * @param string $name
239
     *
240
     * @return bool
241
     */
242 44
    private static function skipMixed(string $name): bool
243
    {
244 44
        $firstLetterLower = $name[0] == mb_strtolower($name[0]);
245 44
        $allLowerOrUpper = (mb_strtolower($name) == $name || mb_strtoupper($name) == $name);
246
247 44
        return ! ($firstLetterLower || $allLowerOrUpper);
248
    }
249
250
    /**
251
     * Capitalize first letters.
252
     *
253
     * @param string $name
254
     *
255
     * @return string
256
     */
257 42
    private static function capitalize(string $name): string
258
    {
259 42
        $name = mb_strtolower($name);
260
261 42
        $name = mb_ereg_replace_callback('\b\w', function ($matches) {
262 42
            return mb_strtoupper($matches[0]);
263 42
        }, $name);
264
265
        // Lowercase 's
266 42
        $name = mb_ereg_replace_callback('\'\w\b', function ($matches) {
267 6
            return mb_strtolower($matches[0]);
268 42
        }, $name);
269
270 42
        return self::updateIrish($name);
271
    }
272
273
    /**
274
     * Update for Irish names.
275
     *
276
     * @param string $name
277
     *
278
     * @return string
279
     */
280 42
    private static function updateIrish(string $name): string
281
    {
282 42
        if ( ! self::$options['irish']) return $name;
283
284
        if (
285 42
            mb_ereg_match('.*?\bMac[A-Za-z]{2,}[^aciozj]\b', $name) ||
286 42
            mb_ereg_match('.*?\bMc', $name)
287
        ) {
288 14
            $name = self::updateMac($name);
289
        }
290
291 42
        return mb_ereg_replace('Macmurdo', 'MacMurdo', $name);
292
    }
293
294
    /**
295
     * Updates irish Mac & Mc.
296
     *
297
     * @param string $name
298
     *
299
     * @return string
300
     */
301 14
    private static function updateMac(string $name): string
302
    {
303 14
        $name = mb_ereg_replace_callback('\b(Ma?c)([A-Za-z]+)', function ($matches) {
304 14
            return $matches[1] . mb_strtoupper(mb_substr($matches[2], 0, 1)) . mb_substr($matches[2], 1);
305 14
        }, $name);
306
307
        // Now fix "Mac" exceptions
308 14
        foreach (self::EXCEPTIONS as $pattern => $replacement) {
309 14
            $name = mb_ereg_replace($pattern, $replacement, $name);
310
        }
311
312 14
        return $name;
313
    }
314
315
    /**
316
     * Define required replacements.
317
     *
318
     * @return array
319
     */
320 42
    private static function getReplacements(): array
321
    {
322
        // General fixes
323 42
        $replacements = self::REPLACEMENTS;
324 42
        if ( ! self::$options['spanish']) {
325 42
            $replacements = array_merge($replacements, self::SPANISH);
326
        }
327
328 42
        if (self::$options['hebrew']) {
329 42
            $replacements = array_merge($replacements, self::HEBREW);
330
        }
331
332 42
        return $replacements;
333
    }
334
335
    /**
336
     * Correct capitalization of initial names like JJ and TJ.
337
     *
338
     * @param string $name
339
     *
340
     * @return string
341
     */
342 42
    private static function correctInitialNames(string $name): string
343
    {
344 42
        return mb_ereg_replace_callback(self::INITIAL_NAME_REGEX, function ($matches) {
345 2
            $match = $matches[0];
346
347 2
            if (in_array($matches[1], self::INITIAL_NAME_EXCEPTIONS)) {
348 2
                return $match;
349
            }
350
351 2
            return mb_strtoupper($match);
352 42
        }, $name);
353
    }
354
355
    /**
356
     * Correct lower-case words of titles.
357
     *
358
     * @param string $name
359
     *
360
     * @return string
361
     */
362 42
    private static function correctLowerCaseWords(string $name): string
363
    {
364 42
        foreach (self::LOWER_CASE_WORDS as $lowercase) {
365 42
            $name = mb_ereg_replace('\b' . $lowercase . '\b', mb_strtolower($lowercase), $name);
0 ignored issues
show
Bug introduced by
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

365
            $name = mb_ereg_replace('\b' . $lowercase . '\b', mb_strtolower($lowercase), /** @scrutinizer ignore-type */ $name);
Loading history...
366
        }
367 42
        return $name;
368
    }
369
370
    /**
371
     * Process options with given name
372
     *
373
     * @param string $name
374
     *
375
     * @return string
376
     */
377 42
    private static function processOptions(string $name): string
378
    {
379 42
        if (self::$options['roman']) {
380 42
            $name = self::updateRoman($name);
381
        }
382
383 42
        if (self::$options['spanish']) {
384 2
            $name = self::fixConjunction($name);
385
        }
386
387 42
        if (self::$options['postnominal']) {
388 42
            $name = self::fixPostNominal($name);
389
        }
390
391 42
        return $name;
392
    }
393
394
    /**
395
     * Fix roman numeral names.
396
     *
397
     * @param string $name
398
     *
399
     * @return string
400
     */
401 42
    private static function updateRoman(string $name): string
402
    {
403 42
        return mb_ereg_replace_callback(self::ROMAN_REGEX, function ($matches) {
404 42
            return mb_strtoupper($matches[0]);
405 42
        }, $name);
406
    }
407
408
    /**
409
     * Fix Spanish conjunctions.
410
     *
411
     * @param string $name
412
     *
413
     * @return string
414
     */
415 2
    private static function fixConjunction(string $name): string
416
    {
417 2
        foreach (self::CONJUNCTIONS as $conjunction) {
418 2
            $name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), $name);
0 ignored issues
show
Bug introduced by
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

418
            $name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), /** @scrutinizer ignore-type */ $name);
Loading history...
419
        }
420 2
        return $name;
421
    }
422
423
    /**
424
     * Fix post-nominal letter cases.
425
     *
426
     * @param string $name
427
     * @return string
428
     */
429 42
    private static function fixPostNominal(string $name): string
430
    {
431 42
        $postNominals = array_diff(self::POST_NOMINALS, self::$postNominalsExcluded);
432 42
        foreach ($postNominals as $postNominal) {
433 42
            $name = mb_ereg_replace('\b' . $postNominal . '\b', $postNominal, $name, 'ix');
0 ignored issues
show
Bug introduced by
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

433
            $name = mb_ereg_replace('\b' . $postNominal . '\b', $postNominal, /** @scrutinizer ignore-type */ $name, 'ix');
Loading history...
434
        }
435 42
        return $name;
436
    }
437
}
438