Passed
Pull Request — master (#21)
by Yuri
04:07
created

Formatter::adjustHTMLEntities()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1

Importance

Changes 1
Bugs 1 Features 0
Metric Value
cc 1
eloc 3
c 1
b 1
f 0
nc 1
nop 1
dl 0
loc 5
ccs 4
cts 4
cp 1
crap 1
rs 10
1
<?php namespace Tamtamchik\NameCase;
2
3
/**
4
 * Class Formatter.
5
 */
6
class Formatter
7
{
8
    // Irish exceptions.
9
    private const EXCEPTIONS = [
10
        '\bMacEdo' => 'Macedo',
11
        '\bMacEvicius' => 'Macevicius',
12
        '\bMacHado' => 'Machado',
13
        '\bMacHar' => 'Machar',
14
        '\bMacHin' => 'Machin',
15
        '\bMacHlin' => 'Machlin',
16
        '\bMacIas' => 'Macias',
17
        '\bMacIulis' => 'Maciulis',
18
        '\bMacKie' => 'Mackie',
19
        '\bMacKle' => 'Mackle',
20
        '\bMacKlin' => 'Macklin',
21
        '\bMacKmin' => 'Mackmin',
22
        '\bMacQuarie' => 'Macquarie',
23
        '\bMacOmber' => 'Macomber',
24
        '\bMacIn' => 'Macin',
25
        '\bMacKintosh' => 'Mackintosh',
26
        '\bMacKen' => 'Macken',
27
        '\bMacHen' => 'Machen',
28
        '\bMacisaac' => 'MacIsaac',
29
        '\bMacHiel' => 'Machiel',
30
        '\bMacIol' => 'Maciol',
31
        '\bMacKell' => 'Mackell',
32
        '\bMacKlem' => 'Macklem',
33
        '\bMacKrell' => 'Mackrell',
34
        '\bMacLin' => 'Maclin',
35
        '\bMacKey' => 'Mackey',
36
        '\bMacKley' => 'Mackley',
37
        '\bMacHell' => 'Machell',
38
        '\bMacHon' => 'Machon',
39
    ];
40
41
    // General replacements.
42
    private const REPLACEMENTS = [
43
        '\bAl(?=\s+\w)' => 'al',        // al Arabic or forename Al.
44
        '\bAp\b' => 'ap',        // ap Welsh.
45
        '\b(Bin|Binti|Binte)\b' => 'bin',       // bin, binti, binte Arabic.
46
        '\bDell([ae])\b' => 'dell\1',    // della and delle Italian.
47
        '\bD([aeiou])\b' => 'd\1',       // da, de, di Italian; du French; do Brasil.
48
        '\bD([ao]s)\b' => 'd\1',       // das, dos Brasileiros.
49
        '\bDe([lrn])\b' => 'de\1',      // del Italian; der/den Dutch/Flemish.
50
        '\bL([eo])\b' => 'l\1',       // lo Italian; le French.
51
        '\bTe([rn])\b' => 'te\1',      // ten, ter Dutch/Flemish.
52
        '\bVan(?=\s+\w)' => 'van',       // van German or forename Van.
53
        '\bVon\b' => 'von',       // von Dutch/Flemish.
54
    ];
55
56
    private const SPANISH = [
57
        '\bEl\b' => 'el',        // el Greek or El Spanish.
58
        '\bLa\b' => 'la',        // la French or La Spanish.
59
    ];
60
61
    private const HEBREW = [
62
        '\bBen(?=\s+\w)' => 'ben', // ben Hebrew or forename Ben.
63
        '\bBat(?=\s+\w)' => 'bat', // bat Hebrew or forename Bat.
64
    ];
65
66
    // Spanish conjunctions.
67
    private const CONJUNCTIONS = ['Y', 'E', 'I'];
68
69
    // Roman letters regexp.
70
    private const ROMAN_REGEX = '\b((?:[Xx]{1,3}|[Xx][Ll]|[Ll][Xx]{0,3})?(?:[Ii]{1,3}|[Ii][VvXx]|[Vv][Ii]{0,3})?)\b';
71
72
    // Post nominal values.
73
    private const POST_NOMINALS = [
74
        'ACILEx', 'ACSM', 'ADC', 'AEPC', 'AFC', 'AFM', 'AICSM', 'AKC', 'AM', 'ARBRIBA', 'ARCS', 'ARRC', 'ARSM', 'AUH',
75
        'AUS',
76
        'BA', 'BArch', 'BCh', 'BChir', 'BCL', 'BDS', 'BEd', 'BEM', 'BEng', 'BM', 'BS', 'BSc', 'BSW', 'BVM&S',
77
        'BVScBVetMed',
78
        'CB', 'CBE', 'CEng', 'CertHE', 'CGC', 'CGM', 'CH', 'CIE', 'CMarEngCMarSci', 'CMarTech', 'CMG', 'CMILT',
79
        'CML', 'CPhT', 'CPLCTP', 'CPM', 'CQSW', 'CSciTeach', 'CSI', 'CTL', 'CVO',
80
        'DBE', 'DBEnv', 'DC', 'DCB', 'DCM', 'DCMG', 'DConstMgt', 'DCVO', 'DD', 'DEM', 'DFC', 'DFM', 'DIC', 'Dip',
81
        'DipHE', 'DipLP', 'DipSW', 'DL', 'DLitt', 'DLP', 'DPhil', 'DProf', 'DPT', 'DREst', 'DSC', 'DSM', 'DSO',
82
        'DSocSci',
83
        'ED', 'EdD', 'EJLog', 'EMLog', 'EN', 'EngD', 'EngTech', 'ERD', 'ESLog',
84
        'FADO', 'FAWM', 'FBDOFCOptom', 'FCEM', 'FCILEx', 'FCILT', 'FCSP.', 'FdAFdSc', 'FdEng', 'FFHOM', 'FFPM',
85
        'FRCAFFPMRCA', 'FRCGP', 'FRCOG', 'FRCP', 'FRCPsych', 'FRCS', 'FRCVS', 'FSCR.',
86
        'GBE', 'GC', 'GCB', 'GCIE', 'GCILEx', 'GCMG', 'GCSI', 'GCVO', 'GM',
87
        'HNC', 'HNCert', 'HND', 'HNDip',
88
        'ICTTech', 'IDSM', 'IEng', 'IMarEng', 'IOMCPM', 'ISO',
89
        'J', 'JP', 'JrLog',
90
        'KBE', 'KC', 'KCB', 'KCIE', 'KCMG', 'KCSI', 'KCVO', 'KG', 'KP', 'KT',
91
        'LFHOM', 'LG', 'LJ', 'LLB', 'LLD', 'LLM', 'Log', 'LPE', /* 'LT', - excluded, see initial names */
92
        'LVO',
93
        'MA', 'MAcc', 'MAnth', 'MArch', 'MarEngTech', 'MB', 'MBA', 'MBChB', 'MBE', 'MBEIOM', 'MBiochem', 'MC', 'MCEM',
94
        'MCGI', 'MCh.', 'MChem', 'MChiro', 'MClinRes', 'MComp', 'MCOptom', 'MCSM', 'MCSP', 'MD', 'MEarthSc',
95
        'MEng', 'MEnt', 'MEP', 'MFHOM', 'MFin', 'MFPM', 'MGeol', 'MILT', 'MJur', 'MLA', 'MLitt', 'MM', 'MMath',
96
        'MMathStat', 'MMORSE', 'MMus', 'MOst', 'MP', 'MPAMEd', 'MPharm', 'MPhil', 'MPhys', 'MRCGP', 'MRCOG',
97
        'MRCP', 'MRCPath', 'MRCPCHFRCPCH', 'MRCPsych', 'MRCS', 'MRCVS', 'MRes',
98
        /* 'MS', - excluded, see initial names */
99
        'MSc', 'MScChiro', 'MSci',
100
        'MSCR', 'MSM', 'MSocSc', 'MSP', 'MSt', 'MSW', 'MSYP', 'MVO',
101
        'NPQH',
102
        'OBE', 'OBI', 'OM', 'OND',
103
        'PgC', 'PGCAP', 'PGCE', 'PgCert', 'PGCHE', 'PgCLTHE', 'PgD', 'PGDE', 'PgDip', 'PhD', 'PLog', 'PLS',
104
        'QAM', 'QC', 'QFSM', 'QGM', 'QHC', 'QHDS', 'QHNS', 'QHP', 'QHS', 'QPM', 'QS', 'QTSCSci',
105
        'RD', 'RFHN', 'RGN', 'RHV', 'RIAI', 'RIAS', 'RM', 'RMN', 'RN', 'RN1RNA', 'RN2', 'RN3', 'RN4', 'RN5', 'RN6', 'RN7', 'RN8', 'RN9', 'RNC', 'RNLD', 'RNMH', 'ROH', 'RRC', 'RSAW', 'RSci', 'RSciTech', 'RSCN', 'RSN', 'RVM', 'RVN',
106
        'SCHM', 'SCJ', 'SCLD', 'SEN', 'SGM', 'SL', 'SPANSPMH', 'SPCC', 'SPCN', 'SPDN', 'SPHP', 'SPLD', 'SrLog', 'SRN', 'SROT',
107
        'TD',
108
        'UD',
109
        'V100', 'V200', 'V300', 'VC', 'VD', 'VetMB', 'VN', 'VRD'
110
    ];
111
112
    // Excluded post-nominals
113
    private const INITIAL_NAME_REGEX = '\b(Aj|[bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ]{2})\s';
114
115
    // Most two-letter words with no vowels should be kept in all caps as initials
116
    private const INITIAL_NAME_EXCEPTIONS = [
117
        'Mr',
118
        'Ms', // Replaces Member of the Senedd post nominal.
119
        'Dr',
120
        'St',
121
        'Jr',
122
        'Sr',
123
        'Lt', // Replaces Lady of the Order of the Thistle post nominal.
124
    ];
125
    private const LOWER_CASE_WORDS = ['The', 'Of', 'And'];
126
127
    // Lowercase words
128
    private static $postNominalsExcluded = [];
129
130
    // Default options.
131
    private static $options = [
132
        'lazy' => true,
133
        'irish' => true,
134
        'spanish' => false,
135
        'roman' => true,
136
        'hebrew' => true,
137
        'postnominal' => true,
138
    ];
139
140
    /**
141
     * Formatter constructor.
142
     *
143
     * @param array $options
144
     */
145 2
    public function __construct(array $options = [])
146
    {
147 2
        $this->setOptions($options);
148
    }
149
150
    /**
151
     * Global options setter.
152
     *
153
     * @param array $options
154
     */
155 25
    public static function setOptions(array $options): void
156
    {
157 25
        self::$options = array_merge(self::$options, $options);
158
    }
159
160
    /**
161
     * Global post-nominals exclusions setter.
162
     *
163
     * @param array|string|null $values
164
     * @return boolean|void
165
     */
166 3
    public static function excludePostNominals($values)
167
    {
168 3
        if (is_string($values)) {
169 1
            $values = [$values];
170
        }
171
172 3
        if ( ! is_array($values)) {
173 1
            return false;
174
        }
175
176 2
        self::$postNominalsExcluded = array_merge(self::$postNominalsExcluded, $values);
177
    }
178
179
    /**
180
     * Main function for NameCase.
181
     *
182
     * @param string|null $name
183
     * @param array|null $options
184
     *
185
     * @return string
186
     */
187 25
    public static function nameCase(?string $name = '', ?array $options = []): string
188
    {
189 25
        $name = is_null($name) ? '' : $name;
190
191 25
        self::setOptions($options);
192
193
        // Do not do anything if string is mixed and lazy option is true.
194 25
        if ( ! self::canBeProcessed($name)) {
195 5
            return $name;
196
        }
197
198 22
        $original = $name;
199
200
        // Capitalize
201 22
        self::capitalize($name);
202
203 22
        foreach (self::getReplacements() as $pattern => $replacement) {
204 22
            $name = mb_ereg_replace($pattern, $replacement, $name);
205
206
            // Very difficult to write a test in modern environments
207
            // @codeCoverageIgnoreStart
208
            if ( ! is_string($name)) {
209
                return $original;
210
            }
211
            // @codeCoverageIgnoreEnd
212
        }
213
214 22
        self::correctInitialNames($name);
215 22
        self::correctLowerCaseWords($name);
216
217 22
        self::processOptions($name);
218
219 22
        self::adjustHTMLEntities($name);
220
221 22
        return $name;
222
    }
223
224
    /**
225
     * Check if string can be processed.
226
     *
227
     * @param string $name
228
     *
229
     * @return bool
230
     */
231 25
    private static function canBeProcessed(string $name): bool
232
    {
233 25
        if ($name != '') {
234 23
            return ! (self::$options['lazy'] && self::skipMixed($name));
235
        }
236
237 2
        return false;
238
    }
239
240
    /**
241
     * Skip if string is mixed case.
242
     *
243
     * @param string $name
244
     *
245
     * @return bool
246
     */
247 23
    private static function skipMixed(string $name): bool
248
    {
249 23
        $firstLetterLower = $name[0] == mb_strtolower($name[0]);
250 23
        $allLowerOrUpper = (mb_strtolower($name) == $name || mb_strtoupper($name) == $name);
251
252 23
        return ! ($firstLetterLower || $allLowerOrUpper);
253
    }
254
255
    /**
256
     * Capitalize first letters.
257
     *
258
     * @param string $name
259
     */
260 22
    private static function capitalize(string &$name): void
261
    {
262 22
        $name = mb_strtolower($name);
263
264 22
        $name = mb_ereg_replace_callback('\b\w', function ($matches) {
265 22
            return mb_strtoupper($matches[0]);
266 22
        }, $name);
267
268
        // Lowercase 's
269 22
        $name = mb_ereg_replace_callback('\'\w\b', function ($matches) {
270 3
            return mb_strtolower($matches[0]);
271 22
        }, $name);
272
273 22
        self::updateIrish($name);
274
    }
275
276
    /**
277
     * Update for Irish names.
278
     *
279
     * @param string $name
280
     */
281 22
    private static function updateIrish(string &$name): void
282
    {
283 22
        if ( ! self::$options['irish']) return;
284
285
        if (
286 22
            mb_ereg_match('.*?\bMac[A-Za-z]{2,}[^aciozj]\b', $name) ||
287 22
            mb_ereg_match('.*?\bMc', $name)
288
        ) {
289 7
            self::updateMac($name);
290
        }
291
292 22
        $name = mb_ereg_replace('Macmurdo', 'MacMurdo', $name);
293
    }
294
295
    /**
296
     * Updates irish Mac & Mc.
297
     *
298
     * @param string $name
299
     */
300 7
    private static function updateMac(string &$name): void
301
    {
302 7
        $name = mb_ereg_replace_callback('\b(Ma?c)([A-Za-z]+)', function ($matches) {
303 7
            return $matches[1] . mb_strtoupper(mb_substr($matches[2], 0, 1)) . mb_substr($matches[2], 1);
304 7
        }, $name);
305
306
        // Now fix "Mac" exceptions
307 7
        foreach (self::EXCEPTIONS as $pattern => $replacement) {
308 7
            $name = mb_ereg_replace($pattern, $replacement, $name);
309
        }
310
    }
311
312
    /**
313
     * Define required replacements.
314
     *
315
     * @return array
316
     */
317 22
    private static function getReplacements(): array
318
    {
319
        // General fixes
320 22
        $replacements = self::REPLACEMENTS;
321 22
        if ( ! self::$options['spanish']) {
322 22
            $replacements = array_merge($replacements, self::SPANISH);
323
        }
324
325 22
        if (self::$options['hebrew']) {
326 22
            $replacements = array_merge($replacements, self::HEBREW);
327
        }
328
329 22
        return $replacements;
330
    }
331
332
    /**
333
     * Correct capitalization of initial names like JJ and TJ.
334
     *
335
     * @param string $name
336
     */
337 22
    private static function correctInitialNames(string &$name): void
338
    {
339 22
        $name = mb_ereg_replace_callback(self::INITIAL_NAME_REGEX, function ($matches) {
340 1
            $match = $matches[0];
341
342 1
            if (in_array($matches[1], self::INITIAL_NAME_EXCEPTIONS)) {
343 1
                return $match;
344
            }
345
346 1
            return mb_strtoupper($match);
347 22
        }, $name);
348
    }
349
350
    /**
351
     * Correct lower-case words of titles.
352
     *
353
     * @param string $name
354
     */
355 22
    private static function correctLowerCaseWords(string &$name): void
356
    {
357 22
        foreach (self::LOWER_CASE_WORDS as $lowercase) {
358 22
            $name = mb_ereg_replace('\b' . $lowercase . '\b', mb_strtolower($lowercase), $name);
0 ignored issues
show
Bug introduced by
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

358
            $name = mb_ereg_replace('\b' . $lowercase . '\b', mb_strtolower($lowercase), /** @scrutinizer ignore-type */ $name);
Loading history...
359
        }
360
    }
361
362
    /**
363
     * Process options with given name
364
     *
365
     * @param string $name
366
     */
367 22
    private static function processOptions(string &$name): void
368
    {
369 22
        if (self::$options['roman']) {
370 22
            self::updateRoman($name);
371
        }
372
373 22
        if (self::$options['spanish']) {
374 1
            self::fixConjunction($name);
375
        }
376
377 22
        if (self::$options['postnominal']) {
378 22
            self::fixPostNominal($name);
379
        }
380
    }
381
382
    /**
383
     * Fix roman numeral names.
384
     *
385
     * @param string $name
386
     */
387 22
    private static function updateRoman(string &$name): void
388
    {
389 22
        $name = mb_ereg_replace_callback(self::ROMAN_REGEX, function ($matches) {
390 22
            return mb_strtoupper($matches[0]);
391 22
        }, $name);
392
    }
393
394
    /**
395
     * Fix Spanish conjunctions.
396
     *
397
     * @param string $name
398
     */
399 1
    private static function fixConjunction(string &$name): void
400
    {
401 1
        foreach (self::CONJUNCTIONS as $conjunction) {
402 1
            $name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), $name);
0 ignored issues
show
Bug introduced by
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

402
            $name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), /** @scrutinizer ignore-type */ $name);
Loading history...
403
        }
404
    }
405
406
    /**
407
     * Fix post-nominal letter cases.
408
     *
409
     * @param string $name
410
     */
411 22
    private static function fixPostNominal(string &$name): void
412
    {
413 22
        $postNominals = array_diff(self::POST_NOMINALS, self::$postNominalsExcluded);
414 22
        foreach ($postNominals as $postNominal) {
415 22
            $name = mb_ereg_replace('\b' . $postNominal . '\b', $postNominal, $name, 'ix');
0 ignored issues
show
Bug introduced by
It seems like $name can also be of type null; however, parameter $string of mb_ereg_replace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

415
            $name = mb_ereg_replace('\b' . $postNominal . '\b', $postNominal, /** @scrutinizer ignore-type */ $name, 'ix');
Loading history...
416
        }
417
    }
418
419
    /**
420
     * Decode HTML entities.
421
     *
422
     * @param string $name
423
     */
424 22
    private static function adjustHTMLEntities(string &$name): void
425
    {
426 22
        $name = mb_ereg_replace_callback('&[a-zA-Z0-9#]+;', function ($matches) {
427 1
            return mb_strtolower($matches[0]);
428 22
        }, $name);
429
    }
430
}
431