Passed
Pull Request — master (#19)
by
unknown
06:50
created

Formatter::correctInitialNames()   A

Complexity

Conditions 2
Paths 1

Size

Total Lines 11
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 2

Importance

Changes 0
Metric Value
cc 2
eloc 6
nc 1
nop 1
dl 0
loc 11
ccs 6
cts 6
cp 1
crap 2
rs 10
c 0
b 0
f 0
1
<?php namespace Tamtamchik\NameCase;
2
3
/**
4
 * Class Formatter.
5
 */
6
class Formatter
7
{
8
    // Irish exceptions.
9
    private const EXCEPTIONS = [
10
        '\bMacEdo'     => 'Macedo',
11
        '\bMacEvicius' => 'Macevicius',
12
        '\bMacHado'    => 'Machado',
13
        '\bMacHar'     => 'Machar',
14
        '\bMacHin'     => 'Machin',
15
        '\bMacHlin'    => 'Machlin',
16
        '\bMacIas'     => 'Macias',
17
        '\bMacIulis'   => 'Maciulis',
18
        '\bMacKie'     => 'Mackie',
19
        '\bMacKle'     => 'Mackle',
20
        '\bMacKlin'    => 'Macklin',
21
        '\bMacKmin'    => 'Mackmin',
22
        '\bMacQuarie'  => 'Macquarie',
23
        '\bMacOmber'   => 'Macomber',
24
        '\bMacIn'      => 'Macin',
25
        '\bMacKintosh' => 'Mackintosh',
26
        '\bMacKen'     => 'Macken',
27
        '\bMacHen'     => 'Machen',
28
        '\bMacisaac'   => 'MacIsaac',
29
        '\bMacHiel'    => 'Machiel',
30
        '\bMacIol'     => 'Maciol',
31
        '\bMacKell'    => 'Mackell',
32
        '\bMacKlem'    => 'Macklem',
33
        '\bMacKrell'   => 'Mackrell',
34
        '\bMacLin'     => 'Maclin',
35
        '\bMacKey'     => 'Mackey',
36
        '\bMacKley'    => 'Mackley',
37
        '\bMacHell'    => 'Machell',
38
        '\bMacHon'     => 'Machon',
39
    ];
40
41
    // General replacements.
42
    private const REPLACEMENTS = [
43
        '\bAl(?=\s+\w)'         => 'al',        // al Arabic or forename Al.
44
        '\bAp\b'                => 'ap',        // ap Welsh.
45
        '\b(Bin|Binti|Binte)\b' => 'bin',       // bin, binti, binte Arabic.
46
        '\bDell([ae])\b'        => 'dell\1',    // della and delle Italian.
47
        '\bD([aeiou])\b'        => 'd\1',       // da, de, di Italian; du French; do Brasil.
48
        '\bD([ao]s)\b'          => 'd\1',       // das, dos Brasileiros.
49
        '\bDe([lrn])\b'         => 'de\1',      // del Italian; der/den Dutch/Flemish.
50
        '\bL([eo])\b'           => 'l\1',       // lo Italian; le French.
51
        '\bTe([rn])\b'          => 'te\1',      // ten, ter Dutch/Flemish.
52
        '\bVan(?=\s+\w)'        => 'van',       // van German or forename Van.
53
        '\bVon\b'               => 'von',       // von Dutch/Flemish.
54
    ];
55
56
    private const SPANISH = [
57
        '\bEl\b' => 'el',        // el Greek or El Spanish.
58
        '\bLa\b' => 'la',        // la French or La Spanish.
59
    ];
60
61
    private const HEBREW = [
62
        '\bBen(?=\s+\w)' => 'ben', // ben Hebrew or forename Ben.
63
        '\bBat(?=\s+\w)' => 'bat', // bat Hebrew or forename Bat.
64
    ];
65
66
    // Spanish conjunctions.
67
    private const CONJUNCTIONS = ['Y', 'E', 'I'];
68
69
    // Roman letters regexp.
70
    private const ROMAN_REGEX = '\b((?:[Xx]{1,3}|[Xx][Ll]|[Ll][Xx]{0,3})?(?:[Ii]{1,3}|[Ii][VvXx]|[Vv][Ii]{0,3})?)\b';
71
72
    // Post nominal values.
73
    private const POST_NOMINALS = [
74
        'ACILEx', 'ACSM', 'ADC', 'AEPC', 'AFC', 'AFM', 'AICSM', 'AKC', 'AM', 'ARBRIBA', 'ARCS', 'ARRC', 'ARSM', 'AUH', 'AUS',
75
        'BA', 'BArch', 'BCh', 'BChir', 'BCL', 'BDS', 'BEd', 'BEM', 'BEng', 'BM', 'BS', 'BSc', 'BSW', 'BVM&S', 'BVScBVetMed',
76
        'CB', 'CBE', 'CEng', 'CertHE', 'CGC', 'CGM', 'CH', 'CIE', 'CMarEngCMarSci', 'CMarTech', 'CMG', 'CMILT', 'CML', 'CPhT', 'CPLCTP', 'CPM', 'CQSW', 'CSciTeach', 'CSI', 'CTL', 'CVO',
77
        'DBE', 'DBEnv', 'DC', 'DCB', 'DCM', 'DCMG', 'DConstMgt', 'DCVO', 'DD', 'DEM', 'DFC', 'DFM', 'DIC', 'Dip', 'DipHE', 'DipLP', 'DipSW', 'DL', 'DLitt', 'DLP', 'DPhil', 'DProf', 'DPT', 'DREst', 'DSC', 'DSM', 'DSO', 'DSocSci',
78
        'ED', 'EdD', 'EJLog', 'EMLog', 'EN', 'EngD', 'EngTech', 'ERD', 'ESLog',
79
        'FADO', 'FAWM', 'FBDOFCOptom', 'FCEM', 'FCILEx', 'FCILT', 'FCSP.', 'FdAFdSc', 'FdEng', 'FFHOM', 'FFPM', 'FRCAFFPMRCA', 'FRCGP', 'FRCOG', 'FRCP', 'FRCPsych', 'FRCS', 'FRCVS', 'FSCR.',
80
        'GBE', 'GC', 'GCB', 'GCIE', 'GCILEx', 'GCMG', 'GCSI', 'GCVO', 'GM',
81
        'HNC', 'HNCert', 'HND', 'HNDip',
82
        'ICTTech', 'IDSM', 'IEng', 'IMarEng', 'IOMCPM', 'ISO',
83
        'J', 'JP', 'JrLog',
84
        'KBE', 'KC', 'KCB', 'KCIE', 'KCMG', 'KCSI', 'KCVO', 'KG', 'KP', 'KT',
85
        'LFHOM', 'LG', 'LJ', 'LLB', 'LLD', 'LLM', 'Log', 'LPE', 'LT', 'LVO',
86
        'MA', 'MAcc', 'MAnth', 'MArch', 'MarEngTech', 'MB', 'MBA', 'MBChB', 'MBE', 'MBEIOM', 'MBiochem', 'MC', 'MCEM', 'MCGI', 'MCh.', 'MChem', 'MChiro', 'MClinRes', 'MComp', 'MCOptom', 'MCSM', 'MCSP', 'MD', 'MEarthSc', 'MEng', 'MEnt', 'MEP', 'MFHOM', 'MFin', 'MFPM', 'MGeol', 'MILT', 'MJur', 'MLA', 'MLitt', 'MM', 'MMath', 'MMathStat', 'MMORSE', 'MMus', 'MOst', 'MP', 'MPAMEd', 'MPharm', 'MPhil', 'MPhys', 'MRCGP', 'MRCOG', 'MRCP', 'MRCPath', 'MRCPCHFRCPCH', 'MRCPsych', 'MRCS', 'MRCVS', 'MRes', 'MS', 'MSc', 'MScChiro', 'MSci', 'MSCR', 'MSM', 'MSocSc', 'MSP', 'MSt', 'MSW', 'MSYP', 'MVO',
87
        'NPQH',
88
        'OBE', 'OBI', 'OM', 'OND',
89
        'PgC', 'PGCAP', 'PGCE', 'PgCert', 'PGCHE', 'PgCLTHE', 'PgD', 'PGDE', 'PgDip', 'PhD', 'PLog', 'PLS',
90
        'QAM', 'QC', 'QFSM', 'QGM', 'QHC', 'QHDS', 'QHNS', 'QHP', 'QHS', 'QPM', 'QS', 'QTSCSci',
91
        'RD', 'RFHN', 'RGN', 'RHV', 'RIAI', 'RIAS', 'RM', 'RMN', 'RN', 'RN1RNA', 'RN2', 'RN3', 'RN4', 'RN5', 'RN6', 'RN7', 'RN8', 'RN9', 'RNC', 'RNLD', 'RNMH', 'ROH', 'RRC', 'RSAW', 'RSci', 'RSciTech', 'RSCN', 'RSN', 'RVM', 'RVN',
92
        'SCHM', 'SCJ', 'SCLD', 'SEN', 'SGM', 'SL', 'SPANSPMH', 'SPCC', 'SPCN', 'SPDN', 'SPHP', 'SPLD', 'SrLog', 'SRN', 'SROT',
93
        'TD',
94
        'UD',
95
        'V100', 'V200', 'V300', 'VC', 'VD', 'VetMB', 'VN', 'VRD'
96
    ];
97
98
    // Excluded post-nominals
99
    private static $postNominalsExcluded = [];
100
101
    // Most two-letter words with no vowels should be kept in all caps as initials
102
    private const INITIAL_NAME_REGEX = '\b(Aj|[bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ]{2})\s';
103
    private const INITIAL_NAME_EXCEPTIONS = [
104
        'Mr',
105
        'Dr',
106
        'St',
107
        'Jr',
108
        'Sr',
109
        // FIXME: These collide with POST_NOMINALS
110
        // 'Ms',
111
        // 'Lt',
112
    ];
113
114
    // Lowercase words
115
    private const LOWER_CASE_WORDS = ['The', 'Of', 'And'];
116
117
    // Default options.
118
    private static $options = [
119
        'lazy'        => true,
120
        'irish'       => true,
121
        'spanish'     => false,
122
        'roman'       => true,
123
        'hebrew'      => true,
124
        'postnominal' => true,
125
    ];
126
127
    /**
128
     * Formatter constructor.
129
     *
130
     * @param array $options
131
     */
132 4
    public function __construct($options = [])
133
    {
134 4
        $this->setOptions($options);
135 4
    }
136
137
    /**
138
     * Global options setter.
139
     *
140
     * @param array $options
141
     */
142 38
    public static function setOptions($options): void
143
    {
144 38
        self::$options = array_merge(self::$options, $options);
145 38
    }
146
147
    /**
148
     * Global post-nominals exclusions setter.
149
     *
150
     * @param array|string $values
151
     * @return boolean|void
152
     */
153 4
    public static function excludePostNominals($values)
154
    {
155 4
        if (is_string($values)) $values = [$values];
156 4
        if (!is_array($values)) return false;
0 ignored issues
show
introduced by
The condition is_array($values) is always true.
Loading history...
157
158 4
        self::$postNominalsExcluded = array_merge(self::$postNominalsExcluded, $values);
159 4
    }
160
161
    /**
162
     * Main function for NameCase.
163
     *
164
     * @param string $name
165
     * @param array $options
166
     *
167
     * @return string
168
     */
169 42
    public static function nameCase($name = '', array $options = []): string
170
    {
171 42
        if ($name == '') return $name;
172
173 38
        self::setOptions($options);
174
175
        // Do not do anything if string is mixed and lazy option is true.
176 38
        if (self::$options['lazy'] && self::skipMixed($name)) return $name;
177
178
        // Capitalize
179 36
        $name = self::capitalize($name);
180
181 36
        foreach (self::getReplacements() as $pattern => $replacement) {
182 36
            $name = mb_ereg_replace($pattern, $replacement, $name);
183
        }
184
185 36
        $name = self::correctInitialNames($name);
0 ignored issues
show
Bug introduced by
It seems like $name can also be of type null; however, parameter $name of Tamtamchik\NameCase\Form...::correctInitialNames() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

185
        $name = self::correctInitialNames(/** @scrutinizer ignore-type */ $name);
Loading history...
186
187 36
        $name = self::correctLowerCaseWords($name);
188
189 36
        return self::processOptions($name);
190
    }
191
192
    /**
193
     * Process options with given name
194
     *
195
     * @param string $name
196
     *
197
     * @return string
198
     */
199 36
    private static function processOptions(string $name): string
200
    {
201 36
        if (self::$options['roman']) {
202 36
            $name = self::updateRoman($name);
203
        }
204
205 36
        if (self::$options['spanish']) {
206 2
            $name = self::fixConjunction($name);
207
        }
208
209 36
        if (self::$options['postnominal']) {
210 36
            $name = self::fixPostNominal($name);
211
        }
212
213 36
        return $name;
214
    }
215
216
    /**
217
     * Capitalize first letters.
218
     *
219
     * @param string $name
220
     *
221
     * @return string
222
     */
223 36
    private static function capitalize(string $name): string
224
    {
225 36
        $name = mb_strtolower($name);
226
227
        $name = mb_ereg_replace_callback('\b\w', function ($matches) {
228 36
            return mb_strtoupper($matches[0]);
229 36
        }, $name);
230
231
        // Lowercase 's
232
        $name = mb_ereg_replace_callback('\'\w\b', function ($matches) {
233 4
            return mb_strtolower($matches[0]);
234 36
        }, $name);
235
236 36
        $name = self::updateIrish($name);
237
238 36
        return $name;
239
    }
240
241
    /**
242
     * Define required replacements.
243
     *
244
     * @return array
245
     */
246 36
    private static function getReplacements(): array
247
    {
248
        // General fixes
249 36
        $replacements = self::REPLACEMENTS;
250 36
        if ( ! self::$options['spanish']) {
251 36
            $replacements = array_merge($replacements, self::SPANISH);
252
        }
253
254 36
        if (self::$options['hebrew']) {
255 36
            $replacements = array_merge($replacements, self::HEBREW);
256
        }
257
258 36
        return $replacements;
259
    }
260
261
    /**
262
     * Skip if string is mixed case.
263
     *
264
     * @param string $name
265
     *
266
     * @return bool
267
     */
268 38
    private static function skipMixed(string $name): bool
269
    {
270 38
        $firstLetterLower = $name[0] == mb_strtolower($name[0]);
271 38
        $allLowerOrUpper = (mb_strtolower($name) == $name || mb_strtoupper($name) == $name);
272
273 38
        return ! ($firstLetterLower || $allLowerOrUpper);
274
    }
275
276
    /**
277
     * Update for Irish names.
278
     *
279
     * @param string $name
280
     *
281
     * @return string
282
     */
283 36
    private static function updateIrish(string $name): string
284
    {
285 36
        if ( ! self::$options['irish']) return $name;
286
287
        if (
288 36
            mb_ereg_match('.*?\bMac[A-Za-z]{2,}[^aciozj]\b', $name) ||
289 36
            mb_ereg_match('.*?\bMc', $name)
290
        ) {
291 10
            $name = self::updateMac($name);
292
        }
293
294 36
        return mb_ereg_replace('Macmurdo', 'MacMurdo', $name);
295
    }
296
297
    /**
298
     * Fix roman numeral names.
299
     *
300
     * @param string $name
301
     *
302
     * @return string
303
     */
304 36
    private static function updateRoman(string $name): string
305
    {
306
        return mb_ereg_replace_callback(self::ROMAN_REGEX, function ($matches) {
307 36
            return mb_strtoupper($matches[0]);
308 36
        }, $name);
309
    }
310
311
    /**
312
     * Updates irish Mac & Mc.
313
     *
314
     * @param string $name
315
     *
316
     * @return string
317
     */
318 10
    private static function updateMac(string $name): string
319
    {
320
        $name = mb_ereg_replace_callback('\b(Ma?c)([A-Za-z]+)', function ($matches) {
321 10
            return $matches[1] . mb_strtoupper(mb_substr($matches[2], 0, 1)) . mb_substr($matches[2], 1);
322 10
        }, $name);
323
324
        // Now fix "Mac" exceptions
325 10
        foreach (self::EXCEPTIONS as $pattern => $replacement) {
326 10
            $name = mb_ereg_replace($pattern, $replacement, $name);
327
        }
328
329 10
        return $name;
330
    }
331
332
    /**
333
     * Fix Spanish conjunctions.
334
     *
335
     * @param string $name
336
     *
337
     * @return string
338
     */
339 2
    private static function fixConjunction(string $name): string
340
    {
341 2
        foreach (self::CONJUNCTIONS as $conjunction) {
342 2
            $name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), $name);
0 ignored issues
show
Bug introduced by
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

342
            $name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), /** @scrutinizer ignore-type */ $name);
Loading history...
343
        }
344 2
        return $name;
345
    }
346
347
    /**
348
     * Correct capitalization of initial names like JJ and TJ.
349
     *
350
     * @param string $name
351
     *
352
     * @return string
353
     */
354 36
    private static function correctInitialNames(string $name): string
355
    {
356
        return mb_ereg_replace_callback(self::INITIAL_NAME_REGEX, function ($matches) {
357 2
            $match = $matches[0];
358
359 2
            if (in_array($matches[1], self::INITIAL_NAME_EXCEPTIONS)) {
360 2
                return $match;
361
            }
362
363 2
            return mb_strtoupper($match);
364 36
        }, $name);
365
    }
366
367
    /**
368
     * Correct lower-case words of titles.
369
     *
370
     * @param string $name
371
     *
372
     * @return string
373
     */
374 36
    private static function correctLowerCaseWords(string $name): string
375
    {
376 36
        foreach (self::LOWER_CASE_WORDS as $lowercase) {
377 36
            $name = mb_ereg_replace('\b' . $lowercase . '\b', mb_strtolower($lowercase), $name);
0 ignored issues
show
Bug introduced by
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

377
            $name = mb_ereg_replace('\b' . $lowercase . '\b', mb_strtolower($lowercase), /** @scrutinizer ignore-type */ $name);
Loading history...
378
        }
379 36
        return $name;
380
    }
381
382
    /**
383
     * Fix post-nominal letter cases.
384
     *
385
     * @param string $name
386
     * @return string
387
     */
388 36
    private static function fixPostNominal(string $name): string
389
    {
390 36
        $postNominals = array_diff(self::POST_NOMINALS, self::$postNominalsExcluded);
391 36
        foreach ($postNominals as $postNominal) {
392 36
            $name = mb_ereg_replace('\b' . $postNominal . '\b', $postNominal, $name, 'ix');
0 ignored issues
show
Bug introduced by
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

392
            $name = mb_ereg_replace('\b' . $postNominal . '\b', $postNominal, /** @scrutinizer ignore-type */ $name, 'ix');
Loading history...
393
        }
394 36
        return $name;
395
    }
396
}
397