Completed
Push — master ( 36cbd2...094124 )
by Yuri
10:54
created

Formatter::processOptions()   A

Complexity

Conditions 4
Paths 8

Size

Total Lines 16

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 4

Importance

Changes 0
Metric Value
dl 0
loc 16
ccs 4
cts 4
cp 1
rs 9.7333
c 0
b 0
f 0
cc 4
nc 8
nop 1
crap 4
1
<?php namespace Tamtamchik\NameCase;
2
3
/**
4
 * Class Formatter.
5
 */
6
class Formatter
7
{
8
    // Irish exceptions.
9
    private const EXCEPTIONS = [
10
        '\bMacEdo'     => 'Macedo',
11
        '\bMacEvicius' => 'Macevicius',
12
        '\bMacHado'    => 'Machado',
13
        '\bMacHar'     => 'Machar',
14
        '\bMacHin'     => 'Machin',
15
        '\bMacHlin'    => 'Machlin',
16
        '\bMacIas'     => 'Macias',
17
        '\bMacIulis'   => 'Maciulis',
18
        '\bMacKie'     => 'Mackie',
19
        '\bMacKle'     => 'Mackle',
20
        '\bMacKlin'    => 'Macklin',
21
        '\bMacKmin'    => 'Mackmin',
22
        '\bMacQuarie'  => 'Macquarie',
23
        '\bMacOmber'   => 'Macomber',
24
        '\bMacIn'      => 'Macin',
25
        '\bMacKintosh' => 'Mackintosh',
26
        '\bMacKen'     => 'Macken',
27
        '\bMacHen'     => 'Machen',
28
        '\bMacisaac'   => 'MacIsaac',
29
        '\bMacHiel'    => 'Machiel',
30
        '\bMacIol'     => 'Maciol',
31
        '\bMacKell'    => 'Mackell',
32
        '\bMacKlem'    => 'Macklem',
33
        '\bMacKrell'   => 'Mackrell',
34
        '\bMacLin'     => 'Maclin',
35
        '\bMacKey'     => 'Mackey',
36
        '\bMacKley'    => 'Mackley',
37
        '\bMacHell'    => 'Machell',
38
        '\bMacHon'     => 'Machon',
39
    ];
40
41
    // General replacements.
42
    private const REPLACEMENTS = [
43
        '\bAl(?=\s+\w)'         => 'al',        // al Arabic or forename Al.
44
        '\bAp\b'                => 'ap',        // ap Welsh.
45
        '\b(Bin|Binti|Binte)\b' => 'bin',       // bin, binti, binte Arabic.
46
        '\bDell([ae])\b'        => 'dell\1',    // della and delle Italian.
47
        '\bD([aeiou])\b'        => 'd\1',       // da, de, di Italian; du French; do Brasil.
48
        '\bD([ao]s)\b'          => 'd\1',       // das, dos Brasileiros.
49
        '\bDe([lrn])\b'         => 'de\1',      // del Italian; der/den Dutch/Flemish.
50
        '\bL([eo])\b'           => 'l\1',       // lo Italian; le French.
51
        '\bTe([rn])'            => 'te\1',      // ten, ter Dutch/Flemish.
52
        '\bVan(?=\s+\w)'        => 'van',       // van German or forename Van.
53
        '\bVon\b'               => 'von',       // von Dutch/Flemish.
54
    ];
55
56
    private const SPANISH = [
57
        '\bEl\b' => 'el',        // el Greek or El Spanish.
58
        '\bLa\b' => 'la',        // la French or La Spanish.
59
    ];
60
61
    const HEBREW = [
62
        '\bBen(?=\s+\w)' => 'ben', // ben Hebrew or forename Ben.
63 15
        '\bBat(?=\s+\w)' => 'bat', // bat Hebrew or forename Bat.
64
    ];
65 15
66
    // Spanish conjunctions.
67 12
    private const CONJUNCTIONS = ["Y", "E", "I"];
68
69
    // Roman letters regexp.
70 12
    private const ROMAN_REGEX = '\b((?:[Xx]{1,3}|[Xx][Ll]|[Ll][Xx]{0,3})?(?:[Ii]{1,3}|[Ii][VvXx]|[Vv][Ii]{0,3})?)\b';
71
72
    // Post nominal values.
73 12
    private const POST_NOMINALS = [
74 12
        'VC', 'GC', 'KG', 'LG', 'KT', 'LT', 'KP', 'GCB', 'OM', 'GCSI', 'GCMG', 'GCIE', 'GCVO',
75
        'GBE', 'CH', 'KCB', 'DCB', 'KCSI', 'KCMG', 'DCMG', 'KCIE', 'KCVO', 'DCVO', 'KBE', 'DBE',
76
        'CB', 'CSI', 'CMG', 'CIE', 'CVO', 'CBE', 'DSO', 'LVO', 'OBE', 'ISO', 'MVO', 'MBEIOM', 'CGC',
77 12
        'RRC', 'DSC', 'MC', 'DFC', 'AFC', 'ARRC', 'OBI', 'DCM', 'CGM', 'GM', 'IDSM', 'DSM', 'MM',
78 12
        'DFM', 'AFM', 'SGM', 'IOMCPM', 'QGM', 'RVM', 'BEM', 'QPM', 'QFSM', 'QAM', 'CPM', 'MSM',
79 12
        'ERD', 'VD', 'TD', 'UD', 'ED', 'RD', 'VRD', 'AEPC', 'ADC', 'QHP', 'QHS', 'QHDS', 'QHNS',
80
        'QHC', 'SCJ', 'J', 'LJ', 'QS', 'SL', 'QC', 'KC', 'JP', 'DL', 'MP', 'MSP', 'MSYP', 'AM',
81 12
        'MLA', 'MEP', 'DBEnv', 'DConstMgt', 'DREst', 'EdD', 'DPhil', 'PhD', 'DLitt', 'DSocSci',
82 12
        'MD', 'EngD', 'DD', 'LLD', 'DProf', 'MA', 'MArch', 'MAnth', 'MSc', 'MMORSE', 'MMath',
83
        'MMathStat', 'MPharm', 'MPhil', 'MSc', 'MSci', 'MSt', 'MRes', 'MEng', 'MChem', 'MBiochem',
84 12
        'MSocSc', 'MMus', 'LLM', 'BCL', 'MPhys', 'MComp', 'MAcc', 'MFin', 'MBA', 'MPAMEd', 'MEP',
85
        'MEnt', 'MCGI', 'MGeol', 'MLitt', 'MEarthSc', 'MClinRes', 'BA', 'BSc', 'LLB', 'BEng',
86
        'MBChB', 'FdAFdSc', 'FdEng', 'PgDip', 'PgD', 'PgCert', 'PgC', 'PgCLTHE', 'AUH', 'AKC',
87
        'AUS', 'HNC', 'HNCert', 'HND', 'HNDip', 'DipHE', 'Dip', 'OND', 'CertHE', 'ACSM', 'MCSM',
88
        'DIC', 'AICSM', 'ARSM', 'ARCS', 'LLB', 'LLM', 'BCL', 'MJur', 'DPhil', 'PhD', 'LLD', 'DipLP',
89
        'FCILEx', 'GCILEx', 'ACILEx', 'CQSW', 'DipSW', 'BSW', 'MSW', 'FCILT', 'CMILT', 'MILT',
90
        'CPLCTP', 'CML', 'PLS', 'CTL', 'DLP', 'PLog', 'EJLog', 'ESLog', 'EMLog', 'JrLog', 'Log',
91
        'SrLog', 'BArch', 'MArch', 'ARBRIBA', 'RIAS', 'RIAI', 'RSAW', 'MB', 'BM', 'BS', 'BCh',
92
        'BChir', 'MRCS', 'FRCS', 'MS', 'MCh.', 'MRCP', 'FRCP', 'MRCPCHFRCPCH', 'MRCPath', 'MFPM',
93
        'FFPM', 'BDS', 'MRCPsych', 'FRCPsych', 'MRCOG', 'FRCOG', 'MCEM', 'FCEM', 'FRCAFFPMRCA',
94 12
        'MRCGP', 'FRCGP', 'BSc', 'MScChiro', 'MChiro', 'MSc', 'DC', 'LFHOM', 'MFHOM', 'FFHOM',
95
        'FADO', 'FBDOFCOptom', 'MCOptom', 'MOst', 'DPT', 'MCSP', 'FCSP.', 'SROT', 'MSCR', 'FSCR.',
96 12
        'CPhT', 'RN', 'VN', 'RVN', 'BVScBVetMed', 'VetMB', 'BVM&S', 'MRCVS', 'FRCVS', 'FAWM',
97
        'PGCAP', 'PGCHE', 'PGCE', 'PGDE', 'BEd', 'NPQH', 'QTSCSci', 'CSciTeach', 'RSci', 'RSciTech',
98
        'CEng', 'IEng', 'EngTech', 'ICTTech', 'DEM', 'MM', 'CMarEngCMarSci', 'CMarTech', 'IMarEng',
99 12
        'MarEngTech', 'RGN', 'SRN', 'RMN', 'RSCN', 'SEN', 'EN', 'RNMH', 'RN', 'RM', 'RN1RNA', 'RN2',
100 12
        'RN3', 'RNMH', 'RN4', 'RN5', 'RNLD', 'RN6', 'RN8', 'RNC', 'RN7', 'RN9', 'RHV', 'RSN', 'ROH',
101
        'RFHN', 'SPANSPMH', 'SPCN', 'SPLD', 'SPHP', 'SCHM', 'SCLD', 'SPCC', 'SPDN', 'V100', 'V200',
102
        'V300', 'LPE', 'MS'
103
    ];
104 9
105 12
    // Default options.
106
    private static $options = [
107 12
        'lazy'        => true,
108
        'irish'       => true,
109
        'spanish'     => false,
110
        'roman'       => true,
111
        'hebrew'      => true,
112
        'postnominal' => true,
113
    ];
114
115
    /**
116
     * Formatter constructor.
117 12
     *
118
     * @param array $options
119 12
     */
120 12
    public function __construct ($options = [])
121
    {
122 12
        $this->setOptions($options);
123
    }
124
125
    /**
126
     * Global options setter.
127
     *
128
     * @param array $options
129
     */
130
    public static function setOptions ($options)
131
    {
132 12
        self::$options = array_merge(self::$options, $options);
133
    }
134 12
135
    /**
136 12
     * Main function for NameCase.
137 9
     *
138 9
     * @param string $name
139
     * @param array $options
140 12
     *
141
     * @return string
142
     */
143
    public static function nameCase ($name = '', array $options = []): string
144
    {
145
        if ($name == '') return $name;
146
147
        self::$options = array_merge(self::$options, $options);
148
149
        // Do not do anything if string is mixed and lazy option is true.
150 12
        if (self::$options['lazy'] && self::skipMixed($name)) return $name;
151
152 12
        // Capitalize
153
        $name = self::capitalize($name);
154 12
155 12
        // General fixes
156 12
        $replacements = self::REPLACEMENTS;
157
        if ( ! self::$options['spanish']) {
158 12
            $replacements = array_merge($replacements, self::SPANISH);
159
        }
160
161
        if (self::$options['hebrew']) {
162
            $replacements = array_merge($replacements, self::HEBREW);
163
        }
164
165
        foreach ($replacements as $pattern => $replacement) {
166
            $name = mb_ereg_replace($pattern, $replacement, $name);
167
        }
168 12
169
        return self::processOptions($name);
170
    }
171 12
172 12
    private static function processOptions (string $name): string
173
    {
174
        if (self::$options['roman']) {
175
            $name = self::updateRoman($name);
176
        }
177
178
        if (self::$options['spanish']) {
179
            $name = self::fixConjunction($name);
180
        }
181
182
        if (self::$options['postnominal']) {
183
            $name = self::fixPostNominal($name);
184 9
        }
185 9
186 9
        return $name;
187
    }
188
189 9
    /**
190 9
     * Capitalize first letters.
191 9
     *
192
     * @param string $name
193 9
     *
194
     * @return string
195
     */
196
    private static function capitalize (string $name): string
197
    {
198
        $name = mb_strtolower($name);
199
200
        $name = mb_ereg_replace_callback('\b\w', function ($matches) {
201
            return mb_strtoupper($matches[0]);
202
        }, $name);
203
204
        // Lowercase 's
205
        $name = mb_ereg_replace_callback('\'\w\b', function ($matches) {
206
            return mb_strtolower($matches[0]);
207
        }, $name);
208
209
        $name = self::updateIrish($name);
210
211
        return $name;
212
    }
213
214
    /**
215
     * Skip if string is mixed case.
216
     *
217
     * @param string $name
218
     *
219
     * @return bool
220
     */
221
    private static function skipMixed (string $name): bool
222
    {
223
        $firstLetterLower = $name[0] == mb_strtolower($name[0]);
224
        $allLowerOrUpper = (mb_strtolower($name) == $name || mb_strtoupper($name) == $name);
225
226
        return ! ($firstLetterLower || $allLowerOrUpper);
227
    }
228
229
    /**
230
     * Update for Irish names.
231
     *
232
     * @param string $name
233
     *
234
     * @return string
235
     */
236
    private static function updateIrish (string $name): string
237
    {
238
        if ( ! self::$options['irish']) return $name;
239
240
        if (
241
            mb_ereg_match('.*?\bMac[A-Za-z]{2,}[^aciozj]\b', $name) ||
242
            mb_ereg_match('.*?\bMc', $name)
243
        ) {
244
            $name = self::updateMac($name);
245
        }
246
247
        return mb_ereg_replace('Macmurdo', 'MacMurdo', $name);
248
    }
249
250
    /**
251
     * Fix roman numeral names.
252
     *
253
     * @param string $name
254
     *
255
     * @return string
256
     */
257
    private static function updateRoman (string $name): string
258
    {
259
        return mb_ereg_replace_callback(self::ROMAN_REGEX, function ($matches) {
260
            return mb_strtoupper($matches[0]);
261
        }, $name);
262
    }
263
264
    /**
265
     * Updates irish Mac & Mc.
266
     *
267
     * @param string $name
268
     *
269
     * @return string
270
     */
271
    private static function updateMac (string $name): string
272
    {
273
        $name = mb_ereg_replace_callback('\b(Ma?c)([A-Za-z]+)', function ($matches) {
274
            return $matches[1] . mb_strtoupper(mb_substr($matches[2], 0, 1)) . mb_substr($matches[2], 1);
275
        }, $name);
276
277
        // Now fix "Mac" exceptions
278
        foreach (self::EXCEPTIONS as $pattern => $replacement) {
279
            $name = mb_ereg_replace($pattern, $replacement, $name);
280
        }
281
282
        return $name;
283
    }
284
285
    /**
286
     * Fix Spanish conjunctions.
287
     *
288
     * @param string $name
289
     *
290
     * @return string
291
     */
292
    private static function fixConjunction (string $name): string
293
    {
294
        foreach (self::CONJUNCTIONS as $conjunction) {
295
            $name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), $name);
296
        }
297
        return $name;
298
    }
299
300
    /**
301
     * Fix post-nominal letter cases.
302
     *
303
     * @param string $name
304
     * @return string
305
     */
306
    private static function fixPostNominal (string $name): string
307
    {
308
        foreach (self::POST_NOMINALS as $postnominal) {
309
            $name = mb_ereg_replace('\b' . $postnominal . '\b', $postnominal, $name, 'ix');
310
        }
311
        return $name;
312
    }
313
}
314