Passed
Push — master ( 583adc...f928db )
by Yuri
01:51
created

Formatter::canBeProcessed()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 7
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 3

Importance

Changes 1
Bugs 1 Features 0
Metric Value
cc 3
eloc 3
c 1
b 1
f 0
nc 3
nop 1
dl 0
loc 7
ccs 4
cts 4
cp 1
crap 3
rs 10
1
<?php namespace Tamtamchik\NameCase;
2
3
/**
4
 * Class Formatter.
5
 */
6
class Formatter
7
{
8
    // Irish exceptions.
9
    private const EXCEPTIONS = [
10
        '\bMacEdo' => 'Macedo',
11
        '\bMacEvicius' => 'Macevicius',
12
        '\bMacHado' => 'Machado',
13
        '\bMacHar' => 'Machar',
14
        '\bMacHin' => 'Machin',
15
        '\bMacHlin' => 'Machlin',
16
        '\bMacIas' => 'Macias',
17
        '\bMacIulis' => 'Maciulis',
18
        '\bMacKie' => 'Mackie',
19
        '\bMacKle' => 'Mackle',
20
        '\bMacKlin' => 'Macklin',
21
        '\bMacKmin' => 'Mackmin',
22
        '\bMacQuarie' => 'Macquarie',
23
        '\bMacOmber' => 'Macomber',
24
        '\bMacIn' => 'Macin',
25
        '\bMacKintosh' => 'Mackintosh',
26
        '\bMacKen' => 'Macken',
27
        '\bMacHen' => 'Machen',
28
        '\bMacisaac' => 'MacIsaac',
29
        '\bMacHiel' => 'Machiel',
30
        '\bMacIol' => 'Maciol',
31
        '\bMacKell' => 'Mackell',
32
        '\bMacKlem' => 'Macklem',
33
        '\bMacKrell' => 'Mackrell',
34
        '\bMacLin' => 'Maclin',
35
        '\bMacKey' => 'Mackey',
36
        '\bMacKley' => 'Mackley',
37
        '\bMacHell' => 'Machell',
38
        '\bMacHon' => 'Machon',
39
    ];
40
41
    // General replacements.
42
    private const REPLACEMENTS = [
43
        '\bAl(?=\s+\w)' => 'al',        // al Arabic or forename Al.
44
        '\bAp\b' => 'ap',        // ap Welsh.
45
        '\b(Bin|Binti|Binte)\b' => 'bin',       // bin, binti, binte Arabic.
46
        '\bDell([ae])\b' => 'dell\1',    // della and delle Italian.
47
        '\bD([aeiou])\b' => 'd\1',       // da, de, di Italian; du French; do Brasil.
48
        '\bD([ao]s)\b' => 'd\1',       // das, dos Brasileiros.
49
        '\bDe([lrn])\b' => 'de\1',      // del Italian; der/den Dutch/Flemish.
50
        '\bL([eo])\b' => 'l\1',       // lo Italian; le French.
51
        '\bTe([rn])\b' => 'te\1',      // ten, ter Dutch/Flemish.
52
        '\bVan(?=\s+\w)' => 'van',       // van German or forename Van.
53
        '\bVon\b' => 'von',       // von Dutch/Flemish.
54
    ];
55
56
    private const SPANISH = [
57
        '\bEl\b' => 'el',        // el Greek or El Spanish.
58
        '\bLa\b' => 'la',        // la French or La Spanish.
59
    ];
60
61
    private const HEBREW = [
62
        '\bBen(?=\s+\w)' => 'ben', // ben Hebrew or forename Ben.
63
        '\bBat(?=\s+\w)' => 'bat', // bat Hebrew or forename Bat.
64
    ];
65
66
    // Spanish conjunctions.
67
    private const CONJUNCTIONS = ['Y', 'E', 'I'];
68
69
    // Roman letters regexp.
70
    private const ROMAN_REGEX = '\b((?:[Xx]{1,3}|[Xx][Ll]|[Ll][Xx]{0,3})?(?:[Ii]{1,3}|[Ii][VvXx]|[Vv][Ii]{0,3})?)\b';
71
72
    // Post nominal values.
73
    private const POST_NOMINALS = [
74
        'ACILEx', 'ACSM', 'ADC', 'AEPC', 'AFC', 'AFM', 'AICSM', 'AKC', 'AM', 'ARBRIBA', 'ARCS', 'ARRC', 'ARSM', 'AUH',
75
        'AUS',
76
        'BA', 'BArch', 'BCh', 'BChir', 'BCL', 'BDS', 'BEd', 'BEM', 'BEng', 'BM', 'BS', 'BSc', 'BSW', 'BVM&S',
77
        'BVScBVetMed',
78
        'CB', 'CBE', 'CEng', 'CertHE', 'CGC', 'CGM', 'CH', 'CIE', 'CMarEngCMarSci', 'CMarTech', 'CMG', 'CMILT',
79
        'CML', 'CPhT', 'CPLCTP', 'CPM', 'CQSW', 'CSciTeach', 'CSI', 'CTL', 'CVO',
80
        'DBE', 'DBEnv', 'DC', 'DCB', 'DCM', 'DCMG', 'DConstMgt', 'DCVO', 'DD', 'DEM', 'DFC', 'DFM', 'DIC', 'Dip',
81
        'DipHE', 'DipLP', 'DipSW', 'DL', 'DLitt', 'DLP', 'DPhil', 'DProf', 'DPT', 'DREst', 'DSC', 'DSM', 'DSO',
82
        'DSocSci',
83
        'ED', 'EdD', 'EJLog', 'EMLog', 'EN', 'EngD', 'EngTech', 'ERD', 'ESLog',
84
        'FADO', 'FAWM', 'FBDOFCOptom', 'FCEM', 'FCILEx', 'FCILT', 'FCSP.', 'FdAFdSc', 'FdEng', 'FFHOM', 'FFPM',
85
        'FRCAFFPMRCA', 'FRCGP', 'FRCOG', 'FRCP', 'FRCPsych', 'FRCS', 'FRCVS', 'FSCR.',
86
        'GBE', 'GC', 'GCB', 'GCIE', 'GCILEx', 'GCMG', 'GCSI', 'GCVO', 'GM',
87
        'HNC', 'HNCert', 'HND', 'HNDip',
88
        'ICTTech', 'IDSM', 'IEng', 'IMarEng', 'IOMCPM', 'ISO',
89
        'J', 'JP', 'JrLog',
90
        'KBE', 'KC', 'KCB', 'KCIE', 'KCMG', 'KCSI', 'KCVO', 'KG', 'KP', 'KT',
91
        'LFHOM', 'LG', 'LJ', 'LLB', 'LLD', 'LLM', 'Log', 'LPE', /* 'LT', - excluded, see initial names */
92
        'LVO',
93
        'MA', 'MAcc', 'MAnth', 'MArch', 'MarEngTech', 'MB', 'MBA', 'MBChB', 'MBE', 'MBEIOM', 'MBiochem', 'MC', 'MCEM',
94
        'MCGI', 'MCh.', 'MChem', 'MChiro', 'MClinRes', 'MComp', 'MCOptom', 'MCSM', 'MCSP', 'MD', 'MEarthSc',
95
        'MEng', 'MEnt', 'MEP', 'MFHOM', 'MFin', 'MFPM', 'MGeol', 'MILT', 'MJur', 'MLA', 'MLitt', 'MM', 'MMath',
96
        'MMathStat', 'MMORSE', 'MMus', 'MOst', 'MP', 'MPAMEd', 'MPharm', 'MPhil', 'MPhys', 'MRCGP', 'MRCOG',
97
        'MRCP', 'MRCPath', 'MRCPCHFRCPCH', 'MRCPsych', 'MRCS', 'MRCVS', 'MRes',
98
        /* 'MS', - excluded, see initial names */
99
        'MSc', 'MScChiro', 'MSci',
100
        'MSCR', 'MSM', 'MSocSc', 'MSP', 'MSt', 'MSW', 'MSYP', 'MVO',
101
        'NPQH',
102
        'OBE', 'OBI', 'OM', 'OND',
103
        'PgC', 'PGCAP', 'PGCE', 'PgCert', 'PGCHE', 'PgCLTHE', 'PgD', 'PGDE', 'PgDip', 'PhD', 'PLog', 'PLS',
104
        'QAM', 'QC', 'QFSM', 'QGM', 'QHC', 'QHDS', 'QHNS', 'QHP', 'QHS', 'QPM', 'QS', 'QTSCSci',
105
        'RD', 'RFHN', 'RGN', 'RHV', 'RIAI', 'RIAS', 'RM', 'RMN', 'RN', 'RN1RNA', 'RN2', 'RN3', 'RN4', 'RN5', 'RN6', 'RN7', 'RN8', 'RN9', 'RNC', 'RNLD', 'RNMH', 'ROH', 'RRC', 'RSAW', 'RSci', 'RSciTech', 'RSCN', 'RSN', 'RVM', 'RVN',
106
        'SCHM', 'SCJ', 'SCLD', 'SEN', 'SGM', 'SL', 'SPANSPMH', 'SPCC', 'SPCN', 'SPDN', 'SPHP', 'SPLD', 'SrLog', 'SRN', 'SROT',
107
        'TD',
108
        'UD',
109
        'V100', 'V200', 'V300', 'VC', 'VD', 'VetMB', 'VN', 'VRD'
110
    ];
111
112
    // Excluded post-nominals
113
    private const INITIAL_NAME_REGEX = '\b(Aj|[bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ]{2})\s';
114
115
    // Most two-letter words with no vowels should be kept in all caps as initials
116
    private const INITIAL_NAME_EXCEPTIONS = [
117
        'Mr',
118
        'Ms', // Replaces Member of the Senedd post nominal.
119
        'Dr',
120
        'St',
121
        'Jr',
122
        'Sr',
123
        'Lt', // Replaces Lady of the Order of the Thistle post nominal.
124
    ];
125
    private const LOWER_CASE_WORDS = ['The', 'Of', 'And'];
126
127
    // Lowercase words
128
    private static $postNominalsExcluded = [];
129
130
    // Default options.
131
    private static $options = [
132
        'lazy' => true,
133
        'irish' => true,
134
        'spanish' => false,
135
        'roman' => true,
136
        'hebrew' => true,
137
        'postnominal' => true,
138
    ];
139
140
    /**
141
     * Formatter constructor.
142
     *
143
     * @param array $options
144
     */
145 4
    public function __construct(array $options = [])
146
    {
147 4
        $this->setOptions($options);
148
    }
149
150
    /**
151
     * Global options setter.
152
     *
153
     * @param array $options
154
     */
155 46
    public static function setOptions(array $options): void
156
    {
157 46
        self::$options = array_merge(self::$options, $options);
158
    }
159
160
    /**
161
     * Global post-nominals exclusions setter.
162
     *
163
     * @param array|string $values
164
     * @return boolean|void
165
     */
166 4
    public static function excludePostNominals($values)
167
    {
168 4
        if (is_string($values)) $values = [$values];
169 4
        if ( ! is_array($values)) return false;
0 ignored issues
show
introduced by
The condition is_array($values) is always true.
Loading history...
170
171 4
        self::$postNominalsExcluded = array_merge(self::$postNominalsExcluded, $values);
172
    }
173
174
    /**
175
     * Main function for NameCase.
176
     *
177
     * @param string|null $name
178
     * @param array|null $options
179
     *
180
     * @return string
181
     */
182 46
    public static function nameCase(?string $name = '', ?array $options = []): string
183
    {
184 46
        $name = is_null($name) ? '' : $name;
185
186 46
        self::setOptions($options);
187
188
        // Do not do anything if string is mixed and lazy option is true.
189 46
        if ( ! self::canBeProcessed($name)) {
190 10
            return $name;
191
        }
192
193
        // Capitalize
194 40
        $name = self::capitalize($name);
195
196 40
        foreach (self::getReplacements() as $pattern => $replacement) {
197 40
            $name = mb_ereg_replace($pattern, $replacement, $name);
198
        }
199
200 40
        $name = self::correctInitialNames($name);
0 ignored issues
show
Bug introduced by
It seems like $name can also be of type null; however, parameter $name of Tamtamchik\NameCase\Form...::correctInitialNames() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

200
        $name = self::correctInitialNames(/** @scrutinizer ignore-type */ $name);
Loading history...
201 40
        $name = self::correctLowerCaseWords($name);
202
203 40
        return self::processOptions($name);
204
    }
205
206
    /**
207
     * Check if string can be processed.
208
     *
209
     * @param string $name
210
     *
211
     * @return bool
212
     */
213 46
    private static function canBeProcessed(string $name): bool
214
    {
215 46
        if ($name != '') {
216 42
            return ! (self::$options['lazy'] && self::skipMixed($name));
217
        }
218
219 4
        return false;
220
    }
221
222
    /**
223
     * Skip if string is mixed case.
224
     *
225
     * @param string $name
226
     *
227
     * @return bool
228
     */
229 42
    private static function skipMixed(string $name): bool
230
    {
231 42
        $firstLetterLower = $name[0] == mb_strtolower($name[0]);
232 42
        $allLowerOrUpper = (mb_strtolower($name) == $name || mb_strtoupper($name) == $name);
233
234 42
        return ! ($firstLetterLower || $allLowerOrUpper);
235
    }
236
237
    /**
238
     * Capitalize first letters.
239
     *
240
     * @param string $name
241
     *
242
     * @return string
243
     */
244 40
    private static function capitalize(string $name): string
245
    {
246 40
        $name = mb_strtolower($name);
247
248 40
        $name = mb_ereg_replace_callback('\b\w', function ($matches) {
249 40
            return mb_strtoupper($matches[0]);
250 40
        }, $name);
251
252
        // Lowercase 's
253 40
        $name = mb_ereg_replace_callback('\'\w\b', function ($matches) {
254 6
            return mb_strtolower($matches[0]);
255 40
        }, $name);
256
257 40
        return self::updateIrish($name);
258
    }
259
260
    /**
261
     * Update for Irish names.
262
     *
263
     * @param string $name
264
     *
265
     * @return string
266
     */
267 40
    private static function updateIrish(string $name): string
268
    {
269 40
        if ( ! self::$options['irish']) return $name;
270
271
        if (
272 40
            mb_ereg_match('.*?\bMac[A-Za-z]{2,}[^aciozj]\b', $name) ||
273 40
            mb_ereg_match('.*?\bMc', $name)
274
        ) {
275 14
            $name = self::updateMac($name);
276
        }
277
278 40
        return mb_ereg_replace('Macmurdo', 'MacMurdo', $name);
279
    }
280
281
    /**
282
     * Updates irish Mac & Mc.
283
     *
284
     * @param string $name
285
     *
286
     * @return string
287
     */
288 14
    private static function updateMac(string $name): string
289
    {
290 14
        $name = mb_ereg_replace_callback('\b(Ma?c)([A-Za-z]+)', function ($matches) {
291 14
            return $matches[1] . mb_strtoupper(mb_substr($matches[2], 0, 1)) . mb_substr($matches[2], 1);
292 14
        }, $name);
293
294
        // Now fix "Mac" exceptions
295 14
        foreach (self::EXCEPTIONS as $pattern => $replacement) {
296 14
            $name = mb_ereg_replace($pattern, $replacement, $name);
297
        }
298
299 14
        return $name;
300
    }
301
302
    /**
303
     * Define required replacements.
304
     *
305
     * @return array
306
     */
307 40
    private static function getReplacements(): array
308
    {
309
        // General fixes
310 40
        $replacements = self::REPLACEMENTS;
311 40
        if ( ! self::$options['spanish']) {
312 40
            $replacements = array_merge($replacements, self::SPANISH);
313
        }
314
315 40
        if (self::$options['hebrew']) {
316 40
            $replacements = array_merge($replacements, self::HEBREW);
317
        }
318
319 40
        return $replacements;
320
    }
321
322
    /**
323
     * Correct capitalization of initial names like JJ and TJ.
324
     *
325
     * @param string $name
326
     *
327
     * @return string
328
     */
329 40
    private static function correctInitialNames(string $name): string
330
    {
331 40
        return mb_ereg_replace_callback(self::INITIAL_NAME_REGEX, function ($matches) {
332 2
            $match = $matches[0];
333
334 2
            if (in_array($matches[1], self::INITIAL_NAME_EXCEPTIONS)) {
335 2
                return $match;
336
            }
337
338 2
            return mb_strtoupper($match);
339 40
        }, $name);
340
    }
341
342
    /**
343
     * Correct lower-case words of titles.
344
     *
345
     * @param string $name
346
     *
347
     * @return string
348
     */
349 40
    private static function correctLowerCaseWords(string $name): string
350
    {
351 40
        foreach (self::LOWER_CASE_WORDS as $lowercase) {
352 40
            $name = mb_ereg_replace('\b' . $lowercase . '\b', mb_strtolower($lowercase), $name);
0 ignored issues
show
Bug introduced by
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

352
            $name = mb_ereg_replace('\b' . $lowercase . '\b', mb_strtolower($lowercase), /** @scrutinizer ignore-type */ $name);
Loading history...
353
        }
354 40
        return $name;
355
    }
356
357
    /**
358
     * Process options with given name
359
     *
360
     * @param string $name
361
     *
362
     * @return string
363
     */
364 40
    private static function processOptions(string $name): string
365
    {
366 40
        if (self::$options['roman']) {
367 40
            $name = self::updateRoman($name);
368
        }
369
370 40
        if (self::$options['spanish']) {
371 2
            $name = self::fixConjunction($name);
372
        }
373
374 40
        if (self::$options['postnominal']) {
375 40
            $name = self::fixPostNominal($name);
376
        }
377
378 40
        return $name;
379
    }
380
381
    /**
382
     * Fix roman numeral names.
383
     *
384
     * @param string $name
385
     *
386
     * @return string
387
     */
388 40
    private static function updateRoman(string $name): string
389
    {
390 40
        return mb_ereg_replace_callback(self::ROMAN_REGEX, function ($matches) {
391 40
            return mb_strtoupper($matches[0]);
392 40
        }, $name);
393
    }
394
395
    /**
396
     * Fix Spanish conjunctions.
397
     *
398
     * @param string $name
399
     *
400
     * @return string
401
     */
402 2
    private static function fixConjunction(string $name): string
403
    {
404 2
        foreach (self::CONJUNCTIONS as $conjunction) {
405 2
            $name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), $name);
0 ignored issues
show
Bug introduced by
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

405
            $name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), /** @scrutinizer ignore-type */ $name);
Loading history...
406
        }
407 2
        return $name;
408
    }
409
410
    /**
411
     * Fix post-nominal letter cases.
412
     *
413
     * @param string $name
414
     * @return string
415
     */
416 40
    private static function fixPostNominal(string $name): string
417
    {
418 40
        $postNominals = array_diff(self::POST_NOMINALS, self::$postNominalsExcluded);
419 40
        foreach ($postNominals as $postNominal) {
420 40
            $name = mb_ereg_replace('\b' . $postNominal . '\b', $postNominal, $name, 'ix');
0 ignored issues
show
Bug introduced by
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

420
            $name = mb_ereg_replace('\b' . $postNominal . '\b', $postNominal, /** @scrutinizer ignore-type */ $name, 'ix');
Loading history...
421
        }
422 40
        return $name;
423
    }
424
}
425