Completed
Push — master ( 0edb55...000ee9 )
by Yuri
07:41
created

Formatter   A

Complexity

Total Complexity 34

Size/Duplication

Total Lines 356
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 0

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 34
lcom 1
cbo 0
dl 0
loc 356
ccs 64
cts 64
cp 1
rs 9.68
c 0
b 0
f 0

14 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
A setOptions() 0 4 1
A excludePostNominals() 0 7 3
A nameCase() 0 20 5
A processOptions() 0 16 4
A capitalize() 0 17 1
A getReplacements() 0 14 3
A skipMixed() 0 7 3
A updateIrish() 0 13 4
A updateRoman() 0 6 1
A updateMac() 0 13 2
A fixConjunction() 0 7 2
A correctLowerCaseWords() 0 7 2
A fixPostNominal() 0 8 2
1
<?php namespace Tamtamchik\NameCase;
2
3
/**
4
 * Class Formatter.
5
 */
6
class Formatter
7
{
8
    // Irish exceptions.
9
    private const EXCEPTIONS = [
10
        '\bMacEdo'     => 'Macedo',
11
        '\bMacEvicius' => 'Macevicius',
12
        '\bMacHado'    => 'Machado',
13
        '\bMacHar'     => 'Machar',
14
        '\bMacHin'     => 'Machin',
15
        '\bMacHlin'    => 'Machlin',
16
        '\bMacIas'     => 'Macias',
17
        '\bMacIulis'   => 'Maciulis',
18
        '\bMacKie'     => 'Mackie',
19
        '\bMacKle'     => 'Mackle',
20
        '\bMacKlin'    => 'Macklin',
21
        '\bMacKmin'    => 'Mackmin',
22
        '\bMacQuarie'  => 'Macquarie',
23
        '\bMacOmber'   => 'Macomber',
24
        '\bMacIn'      => 'Macin',
25
        '\bMacKintosh' => 'Mackintosh',
26
        '\bMacKen'     => 'Macken',
27
        '\bMacHen'     => 'Machen',
28
        '\bMacisaac'   => 'MacIsaac',
29
        '\bMacHiel'    => 'Machiel',
30
        '\bMacIol'     => 'Maciol',
31
        '\bMacKell'    => 'Mackell',
32
        '\bMacKlem'    => 'Macklem',
33
        '\bMacKrell'   => 'Mackrell',
34
        '\bMacLin'     => 'Maclin',
35
        '\bMacKey'     => 'Mackey',
36
        '\bMacKley'    => 'Mackley',
37
        '\bMacHell'    => 'Machell',
38
        '\bMacHon'     => 'Machon',
39
    ];
40
41
    // General replacements.
42
    private const REPLACEMENTS = [
43
        '\bAl(?=\s+\w)'         => 'al',        // al Arabic or forename Al.
44
        '\bAp\b'                => 'ap',        // ap Welsh.
45
        '\b(Bin|Binti|Binte)\b' => 'bin',       // bin, binti, binte Arabic.
46
        '\bDell([ae])\b'        => 'dell\1',    // della and delle Italian.
47
        '\bD([aeiou])\b'        => 'd\1',       // da, de, di Italian; du French; do Brasil.
48
        '\bD([ao]s)\b'          => 'd\1',       // das, dos Brasileiros.
49
        '\bDe([lrn])\b'         => 'de\1',      // del Italian; der/den Dutch/Flemish.
50
        '\bL([eo])\b'           => 'l\1',       // lo Italian; le French.
51
        '\bTe([rn])\b'          => 'te\1',      // ten, ter Dutch/Flemish.
52
        '\bVan(?=\s+\w)'        => 'van',       // van German or forename Van.
53
        '\bVon\b'               => 'von',       // von Dutch/Flemish.
54
    ];
55
56
    private const SPANISH = [
57
        '\bEl\b' => 'el',        // el Greek or El Spanish.
58
        '\bLa\b' => 'la',        // la French or La Spanish.
59
    ];
60
61
    private const HEBREW = [
62
        '\bBen(?=\s+\w)' => 'ben', // ben Hebrew or forename Ben.
63
        '\bBat(?=\s+\w)' => 'bat', // bat Hebrew or forename Bat.
64
    ];
65
66
    // Spanish conjunctions.
67
    private const CONJUNCTIONS = ['Y', 'E', 'I'];
68
69
    // Roman letters regexp.
70
    private const ROMAN_REGEX = '\b((?:[Xx]{1,3}|[Xx][Ll]|[Ll][Xx]{0,3})?(?:[Ii]{1,3}|[Ii][VvXx]|[Vv][Ii]{0,3})?)\b';
71
72
    // Post nominal values.
73
    private const POST_NOMINALS = [
74
        'ACILEx', 'ACSM', 'ADC', 'AEPC', 'AFC', 'AFM', 'AICSM', 'AKC', 'AM', 'ARBRIBA', 'ARCS', 'ARRC', 'ARSM', 'AUH', 'AUS',
75
        'BA', 'BArch', 'BCh', 'BChir', 'BCL', 'BDS', 'BEd', 'BEM', 'BEng', 'BM', 'BS', 'BSc', 'BSW', 'BVM&S', 'BVScBVetMed',
76
        'CB', 'CBE', 'CEng', 'CertHE', 'CGC', 'CGM', 'CH', 'CIE', 'CMarEngCMarSci', 'CMarTech', 'CMG', 'CMILT', 'CML', 'CPhT', 'CPLCTP', 'CPM', 'CQSW', 'CSciTeach', 'CSI', 'CTL', 'CVO',
77
        'DBE', 'DBEnv', 'DC', 'DCB', 'DCM', 'DCMG', 'DConstMgt', 'DCVO', 'DD', 'DEM', 'DFC', 'DFM', 'DIC', 'Dip', 'DipHE', 'DipLP', 'DipSW', 'DL', 'DLitt', 'DLP', 'DPhil', 'DProf', 'DPT', 'DREst', 'DSC', 'DSM', 'DSO', 'DSocSci',
78
        'ED', 'EdD', 'EJLog', 'EMLog', 'EN', 'EngD', 'EngTech', 'ERD', 'ESLog',
79
        'FADO', 'FAWM', 'FBDOFCOptom', 'FCEM', 'FCILEx', 'FCILT', 'FCSP.', 'FdAFdSc', 'FdEng', 'FFHOM', 'FFPM', 'FRCAFFPMRCA', 'FRCGP', 'FRCOG', 'FRCP', 'FRCPsych', 'FRCS', 'FRCVS', 'FSCR.',
80
        'GBE', 'GC', 'GCB', 'GCIE', 'GCILEx', 'GCMG', 'GCSI', 'GCVO', 'GM',
81
        'HNC', 'HNCert', 'HND', 'HNDip',
82
        'ICTTech', 'IDSM', 'IEng', 'IMarEng', 'IOMCPM', 'ISO',
83
        'J', 'JP', 'JrLog',
84
        'KBE', 'KC', 'KCB', 'KCIE', 'KCMG', 'KCSI', 'KCVO', 'KG', 'KP', 'KT',
85
        'LFHOM', 'LG', 'LJ', 'LLB', 'LLD', 'LLM', 'Log', 'LPE', 'LT', 'LVO',
86
        'MA', 'MAcc', 'MAnth', 'MArch', 'MarEngTech', 'MB', 'MBA', 'MBChB', 'MBE', 'MBEIOM', 'MBiochem', 'MC', 'MCEM', 'MCGI', 'MCh.', 'MChem', 'MChiro', 'MClinRes', 'MComp', 'MCOptom', 'MCSM', 'MCSP', 'MD', 'MEarthSc', 'MEng', 'MEnt', 'MEP', 'MFHOM', 'MFin', 'MFPM', 'MGeol', 'MILT', 'MJur', 'MLA', 'MLitt', 'MM', 'MMath', 'MMathStat', 'MMORSE', 'MMus', 'MOst', 'MP', 'MPAMEd', 'MPharm', 'MPhil', 'MPhys', 'MRCGP', 'MRCOG', 'MRCP', 'MRCPath', 'MRCPCHFRCPCH', 'MRCPsych', 'MRCS', 'MRCVS', 'MRes', 'MS', 'MSc', 'MScChiro', 'MSci', 'MSCR', 'MSM', 'MSocSc', 'MSP', 'MSt', 'MSW', 'MSYP', 'MVO',
87
        'NPQH',
88
        'OBE', 'OBI', 'OM', 'OND',
89
        'PgC', 'PGCAP', 'PGCE', 'PgCert', 'PGCHE', 'PgCLTHE', 'PgD', 'PGDE', 'PgDip', 'PhD', 'PLog', 'PLS',
90
        'QAM', 'QC', 'QFSM', 'QGM', 'QHC', 'QHDS', 'QHNS', 'QHP', 'QHS', 'QPM', 'QS', 'QTSCSci',
91
        'RD', 'RFHN', 'RGN', 'RHV', 'RIAI', 'RIAS', 'RM', 'RMN', 'RN', 'RN1RNA', 'RN2', 'RN3', 'RN4', 'RN5', 'RN6', 'RN7', 'RN8', 'RN9', 'RNC', 'RNLD', 'RNMH', 'ROH', 'RRC', 'RSAW', 'RSci', 'RSciTech', 'RSCN', 'RSN', 'RVM', 'RVN',
92
        'SCHM', 'SCJ', 'SCLD', 'SEN', 'SGM', 'SL', 'SPANSPMH', 'SPCC', 'SPCN', 'SPDN', 'SPHP', 'SPLD', 'SrLog', 'SRN', 'SROT',
93
        'TD',
94
        'UD',
95
        'V100', 'V200', 'V300', 'VC', 'VD', 'VetMB', 'VN', 'VRD'
96
    ];
97
98
    // Excluded post-nominals
99
    private static $postNominalsExcluded = [];
100
101
    // Lowercase words
102
    private const LOWER_CASE_WORDS = ['The', 'Of', 'And'];
103
104
    // Default options.
105
    private static $options = [
106
        'lazy'        => true,
107
        'irish'       => true,
108
        'spanish'     => false,
109
        'roman'       => true,
110
        'hebrew'      => true,
111
        'postnominal' => true,
112
    ];
113 4
114
    /**
115 4
     * Formatter constructor.
116 4
     *
117
     * @param array $options
118
     */
119
    public function __construct($options = [])
120
    {
121
        $this->setOptions($options);
122
    }
123 18
124
    /**
125 18
     * Global options setter.
126 18
     *
127
     * @param array $options
128
     */
129
    public static function setOptions($options): void
130
    {
131
        self::$options = array_merge(self::$options, $options);
132
    }
133
134
    /**
135
     * Global post-nominals exclusions setter.
136 20
     *
137
     * @param array|string $values
138 20
     * @return boolean|void
139
     */
140 18
    public static function excludePostNominals($values)
141
    {
142
        if (is_string($values)) $values = [$values];
143 18
        if (!is_array($values)) return false;
144
145
        self::$postNominalsExcluded = array_merge(self::$postNominalsExcluded, $values);
146 16
    }
147
148 16
    /**
149 16
     * Main function for NameCase.
150
     *
151
     * @param string $name
152 16
     * @param array $options
153
     *
154
     * @return string
155 16
     */
156
    public static function nameCase($name = '', array $options = []): string
157 16
    {
158 16
        if ($name == '') return $name;
159
160
        self::setOptions($options);
161 16
162 2
        // Do not do anything if string is mixed and lazy option is true.
163
        if (self::$options['lazy'] && self::skipMixed($name)) return $name;
164
165 16
        // Capitalize
166 16
        $name = self::capitalize($name);
167
168
        foreach (self::getReplacements() as $pattern => $replacement) {
169 16
            $name = mb_ereg_replace($pattern, $replacement, $name);
170
        }
171
172
        $name = self::correctLowerCaseWords($name);
173
174
        return self::processOptions($name);
175
    }
176
177
    /**
178
     * Process options with given name
179 16
     *
180
     * @param string $name
181 16
     *
182
     * @return string
183
     */
184 16
    private static function processOptions(string $name): string
185 16
    {
186
        if (self::$options['roman']) {
187
            $name = self::updateRoman($name);
188
        }
189 4
190 16
        if (self::$options['spanish']) {
191
            $name = self::fixConjunction($name);
192 16
        }
193
194 16
        if (self::$options['postnominal']) {
195
            $name = self::fixPostNominal($name);
196
        }
197
198
        return $name;
199
    }
200
201
    /**
202 16
     * Capitalize first letters.
203
     *
204
     * @param string $name
205 16
     *
206 16
     * @return string
207 16
     */
208
    private static function capitalize(string $name): string
209
    {
210 16
        $name = mb_strtolower($name);
211 16
212
        $name = mb_ereg_replace_callback('\b\w', function ($matches) {
213
            return mb_strtoupper($matches[0]);
214 16
        }, $name);
215
216
        // Lowercase 's
217
        $name = mb_ereg_replace_callback('\'\w\b', function ($matches) {
218
            return mb_strtolower($matches[0]);
219
        }, $name);
220
221
        $name = self::updateIrish($name);
222
223
        return $name;
224 18
    }
225
226 18
    /**
227 18
     * Define required replacements.
228
     *
229 18
     * @return array
230
     */
231
    private static function getReplacements(): array
232
    {
233
        // General fixes
234
        $replacements = self::REPLACEMENTS;
235
        if ( ! self::$options['spanish']) {
236
            $replacements = array_merge($replacements, self::SPANISH);
237
        }
238
239 16
        if (self::$options['hebrew']) {
240
            $replacements = array_merge($replacements, self::HEBREW);
241 16
        }
242
243
        return $replacements;
244 16
    }
245 16
246
    /**
247 6
     * Skip if string is mixed case.
248
     *
249
     * @param string $name
250 16
     *
251
     * @return bool
252
     */
253
    private static function skipMixed(string $name): bool
254
    {
255
        $firstLetterLower = $name[0] == mb_strtolower($name[0]);
256
        $allLowerOrUpper = (mb_strtolower($name) == $name || mb_strtoupper($name) == $name);
257
258
        return ! ($firstLetterLower || $allLowerOrUpper);
259
    }
260 16
261
    /**
262
     * Update for Irish names.
263 16
     *
264 16
     * @param string $name
265
     *
266
     * @return string
267
     */
268
    private static function updateIrish(string $name): string
269
    {
270
        if ( ! self::$options['irish']) return $name;
271
272
        if (
273
            mb_ereg_match('.*?\bMac[A-Za-z]{2,}[^aciozj]\b', $name) ||
274 6
            mb_ereg_match('.*?\bMc', $name)
275
        ) {
276
            $name = self::updateMac($name);
277 6
        }
278 6
279
        return mb_ereg_replace('Macmurdo', 'MacMurdo', $name);
280
    }
281 6
282 6
    /**
283
     * Fix roman numeral names.
284
     *
285 6
     * @param string $name
286
     *
287
     * @return string
288
     */
289
    private static function updateRoman(string $name): string
290
    {
291
        return mb_ereg_replace_callback(self::ROMAN_REGEX, function ($matches) {
292
            return mb_strtoupper($matches[0]);
293
        }, $name);
294
    }
295 2
296
    /**
297 2
     * Updates irish Mac & Mc.
298 2
     *
299
     * @param string $name
300 2
     *
301
     * @return string
302
     */
303
    private static function updateMac(string $name): string
304
    {
305
        $name = mb_ereg_replace_callback('\b(Ma?c)([A-Za-z]+)', function ($matches) {
306
            return $matches[1] . mb_strtoupper(mb_substr($matches[2], 0, 1)) . mb_substr($matches[2], 1);
307
        }, $name);
308
309 16
        // Now fix "Mac" exceptions
310
        foreach (self::EXCEPTIONS as $pattern => $replacement) {
311 16
            $name = mb_ereg_replace($pattern, $replacement, $name);
312 16
        }
313
314 16
        return $name;
315
    }
316
317
    /**
318
     * Fix Spanish conjunctions.
319
     *
320
     * @param string $name
321
     *
322
     * @return string
323
     */
324
    private static function fixConjunction(string $name): string
325
    {
326
        foreach (self::CONJUNCTIONS as $conjunction) {
327
            $name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), $name);
328
        }
329
        return $name;
330
    }
331
332
    /**
333
     * Correct lower-case words of titles.
334
     *
335
     * @param string $name
336
     *
337
     * @return string
338
     */
339
    private static function correctLowerCaseWords(string $name): string
340
    {
341
        foreach (self::LOWER_CASE_WORDS as $lowercase) {
342
            $name = mb_ereg_replace('\b' . $lowercase . '\b', mb_strtolower($lowercase), $name);
343
        }
344
        return $name;
345
    }
346
347
    /**
348
     * Fix post-nominal letter cases.
349
     *
350
     * @param string $name
351
     * @return string
352
     */
353
    private static function fixPostNominal(string $name): string
354
    {
355
        $postNominals = array_diff(self::POST_NOMINALS, self::$postNominalsExcluded);
356
        foreach ($postNominals as $postNominal) {
357
            $name = mb_ereg_replace('\b' . $postNominal . '\b', $postNominal, $name, 'ix');
358
        }
359
        return $name;
360
    }
361
}
362