Completed
Push — master ( b1e4ae...36cbd2 )
by Yuri
14:58
created

Formatter::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
crap 1
1
<?php namespace Tamtamchik\NameCase;
2
3
/**
4
 * Class Formatter.
5
 */
6
class Formatter
7
{
8
    // Irish exceptions.
9
    private const EXCEPTIONS = [
10
        '\bMacEdo'     => 'Macedo',
11
        '\bMacEvicius' => 'Macevicius',
12
        '\bMacHado'    => 'Machado',
13
        '\bMacHar'     => 'Machar',
14
        '\bMacHin'     => 'Machin',
15
        '\bMacHlin'    => 'Machlin',
16
        '\bMacIas'     => 'Macias',
17
        '\bMacIulis'   => 'Maciulis',
18
        '\bMacKie'     => 'Mackie',
19
        '\bMacKle'     => 'Mackle',
20
        '\bMacKlin'    => 'Macklin',
21
        '\bMacKmin'    => 'Mackmin',
22
        '\bMacQuarie'  => 'Macquarie',
23
        '\bMacOmber'   => 'Macomber',
24
        '\bMacIn'      => 'Macin',
25
        '\bMacKintosh' => 'Mackintosh',
26
        '\bMacKen'     => 'Macken',
27
        '\bMacHen'     => 'Machen',
28
        '\bMacisaac'   => 'MacIsaac',
29
        '\bMacHiel'    => 'Machiel',
30
        '\bMacIol'     => 'Maciol',
31
        '\bMacKell'    => 'Mackell',
32
        '\bMacKlem'    => 'Macklem',
33
        '\bMacKrell'   => 'Mackrell',
34
        '\bMacLin'     => 'Maclin',
35
        '\bMacKey'     => 'Mackey',
36
        '\bMacKley'    => 'Mackley',
37
        '\bMacHell'    => 'Machell',
38
        '\bMacHon'     => 'Machon',
39
    ];
40
41
    // General replacements.
42
    private const REPLACEMENTS = [
43
        '\bAl(?=\s+\w)'         => 'al',        // al Arabic or forename Al.
44
        '\bAp\b'                => 'ap',        // ap Welsh.
45
        '\b(Bin|Binti|Binte)\b' => 'bin',       // bin, binti, binte Arabic.
46
        '\bDell([ae])\b'        => 'dell\1',    // della and delle Italian.
47
        '\bD([aeiou])\b'        => 'd\1',       // da, de, di Italian; du French; do Brasil.
48
        '\bD([ao]s)\b'          => 'd\1',       // das, dos Brasileiros.
49
        '\bDe([lrn])\b'         => 'de\1',      // del Italian; der/den Dutch/Flemish.
50
        '\bL([eo])\b'           => 'l\1',       // lo Italian; le French.
51
        '\bTe([rn])'            => 'te\1',      // ten, ter Dutch/Flemish.
52
        '\bVan(?=\s+\w)'        => 'van',       // van German or forename Van.
53
        '\bVon\b'               => 'von',       // von Dutch/Flemish.
54
    ];
55
56
    private const SPANISH = [
57
        '\bEl\b' => 'el',        // el Greek or El Spanish.
58
        '\bLa\b' => 'la',        // la French or La Spanish.
59
    ];
60
61
    const HEBREW = [
62
        '\bBen(?=\s+\w)' => 'ben', // ben Hebrew or forename Ben.
63 15
        '\bBat(?=\s+\w)' => 'bat', // bat Hebrew or forename Bat.
64
    ];
65 15
66
    // Spanish conjunctions.
67 12
    private const CONJUNCTIONS = ["Y", "E", "I"];
68
69
    // Roman letters regexp.
70 12
    private const ROMAN_REGEX = '\b((?:[Xx]{1,3}|[Xx][Ll]|[Ll][Xx]{0,3})?(?:[Ii]{1,3}|[Ii][VvXx]|[Vv][Ii]{0,3})?)\b';
71
72
    // Post nominal values.
73 12
    private const POST_NOMINALS = [
74 12
        'VC', 'GC', 'KG', 'LG', 'KT', 'LT', 'KP', 'GCB', 'OM', 'GCSI', 'GCMG', 'GCIE', 'GCVO',
75
        'GBE', 'CH', 'KCB', 'DCB', 'KCSI', 'KCMG', 'DCMG', 'KCIE', 'KCVO', 'DCVO', 'KBE', 'DBE',
76
        'CB', 'CSI', 'CMG', 'CIE', 'CVO', 'CBE', 'DSO', 'LVO', 'OBE', 'ISO', 'MVO', 'MBEIOM', 'CGC',
77 12
        'RRC', 'DSC', 'MC', 'DFC', 'AFC', 'ARRC', 'OBI', 'DCM', 'CGM', 'GM', 'IDSM', 'DSM', 'MM',
78 12
        'DFM', 'AFM', 'SGM', 'IOMCPM', 'QGM', 'RVM', 'BEM', 'QPM', 'QFSM', 'QAM', 'CPM', 'MSM',
79 12
        'ERD', 'VD', 'TD', 'UD', 'ED', 'RD', 'VRD', 'AEPC', 'ADC', 'QHP', 'QHS', 'QHDS', 'QHNS',
80
        'QHC', 'SCJ', 'J', 'LJ', 'QS', 'SL', 'QC', 'KC', 'JP', 'DL', 'MP', 'MSP', 'MSYP', 'AM',
81 12
        'MLA', 'MEP', 'DBEnv', 'DConstMgt', 'DREst', 'EdD', 'DPhil', 'PhD', 'DLitt', 'DSocSci',
82 12
        'MD', 'EngD', 'DD', 'LLD', 'DProf', 'MA', 'MArch', 'MAnth', 'MSc', 'MMORSE', 'MMath',
83
        'MMathStat', 'MPharm', 'MPhil', 'MSc', 'MSci', 'MSt', 'MRes', 'MEng', 'MChem', 'MBiochem',
84 12
        'MSocSc', 'MMus', 'LLM', 'BCL', 'MPhys', 'MComp', 'MAcc', 'MFin', 'MBA', 'MPAMEd', 'MEP',
85
        'MEnt', 'MCGI', 'MGeol', 'MLitt', 'MEarthSc', 'MClinRes', 'BA', 'BSc', 'LLB', 'BEng',
86
        'MBChB', 'FdAFdSc', 'FdEng', 'PgDip', 'PgD', 'PgCert', 'PgC', 'PgCLTHE', 'AUH', 'AKC',
87
        'AUS', 'HNC', 'HNCert', 'HND', 'HNDip', 'DipHE', 'Dip', 'OND', 'CertHE', 'ACSM', 'MCSM',
88
        'DIC', 'AICSM', 'ARSM', 'ARCS', 'LLB', 'LLM', 'BCL', 'MJur', 'DPhil', 'PhD', 'LLD', 'DipLP',
89
        'FCILEx', 'GCILEx', 'ACILEx', 'CQSW', 'DipSW', 'BSW', 'MSW', 'FCILT', 'CMILT', 'MILT',
90
        'CPLCTP', 'CML', 'PLS', 'CTL', 'DLP', 'PLog', 'EJLog', 'ESLog', 'EMLog', 'JrLog', 'Log',
91
        'SrLog', 'BArch', 'MArch', 'ARBRIBA', 'RIAS', 'RIAI', 'RSAW', 'MB', 'BM', 'BS', 'BCh',
92
        'BChir', 'MRCS', 'FRCS', 'MS', 'MCh.', 'MRCP', 'FRCP', 'MRCPCHFRCPCH', 'MRCPath', 'MFPM',
93
        'FFPM', 'BDS', 'MRCPsych', 'FRCPsych', 'MRCOG', 'FRCOG', 'MCEM', 'FCEM', 'FRCAFFPMRCA',
94 12
        'MRCGP', 'FRCGP', 'BSc', 'MScChiro', 'MChiro', 'MSc', 'DC', 'LFHOM', 'MFHOM', 'FFHOM',
95
        'FADO', 'FBDOFCOptom', 'MCOptom', 'MOst', 'DPT', 'MCSP', 'FCSP.', 'SROT', 'MSCR', 'FSCR.',
96 12
        'CPhT', 'RN', 'VN', 'RVN', 'BVScBVetMed', 'VetMB', 'BVM&S', 'MRCVS', 'FRCVS', 'FAWM',
97
        'PGCAP', 'PGCHE', 'PGCE', 'PGDE', 'BEd', 'NPQH', 'QTSCSci', 'CSciTeach', 'RSci', 'RSciTech',
98
        'CEng', 'IEng', 'EngTech', 'ICTTech', 'DEM', 'MM', 'CMarEngCMarSci', 'CMarTech', 'IMarEng',
99 12
        'MarEngTech', 'RGN', 'SRN', 'RMN', 'RSCN', 'SEN', 'EN', 'RNMH', 'RN', 'RM', 'RN1RNA', 'RN2',
100 12
        'RN3', 'RNMH', 'RN4', 'RN5', 'RNLD', 'RN6', 'RN8', 'RNC', 'RN7', 'RN9', 'RHV', 'RSN', 'ROH',
101
        'RFHN', 'SPANSPMH', 'SPCN', 'SPLD', 'SPHP', 'SCHM', 'SCLD', 'SPCC', 'SPDN', 'V100', 'V200',
102
        'V300', 'LPE', 'MS'
103
    ];
104 9
105 12
    // Default options.
106
    private static $options = [
107 12
        'lazy'        => true,
108
        'irish'       => true,
109
        'spanish'     => false,
110
        'roman'       => true,
111
        'hebrew'      => true,
112
        'postnominal' => true,
113
    ];
114
115
    /**
116
     * Formatter constructor.
117 12
     *
118
     * @param array $options
119 12
     */
120 12
    public function __construct ($options = [])
121
    {
122 12
        $this->setOptions($options);
123
    }
124
125
    /**
126
     * Global options setter.
127
     *
128
     * @param array $options
129
     */
130
    public static function setOptions ($options)
131
    {
132 12
        self::$options = array_merge(self::$options, $options);
133
    }
134 12
135
    /**
136 12
     * Main function for NameCase.
137 9
     *
138 9
     * @param string $name
139
     * @param array $options
140 12
     *
141
     * @return string
142
     */
143
    public static function nameCase ($name = '', array $options = []): string
144
    {
145
        if ($name == '') return $name;
146
147
        self::$options = array_merge(self::$options, $options);
148
149
        // Do not do anything if string is mixed and lazy option is true.
150 12
        if (self::$options['lazy'] && self::skipMixed($name)) return $name;
151
152 12
        // Capitalize
153
        $name = self::capitalize($name);
154 12
        $name = self::updateIrish($name);
155 12
156 12
        // General fixes
157
        $replacements = self::REPLACEMENTS;
158 12
        if ( ! self::$options['spanish']) {
159
            $replacements = array_merge($replacements, self::SPANISH);
160
        }
161
162
        if (self::$options['hebrew']) {
163
            $replacements = array_merge($replacements, self::HEBREW);
164
        }
165
166
        foreach ($replacements as $pattern => $replacement) {
167
            $name = mb_ereg_replace($pattern, $replacement, $name);
168 12
        }
169
170
        if (self::$options['roman']) {
171 12
            $name = self::updateRoman($name);
172 12
        }
173
174
        if (self::$options['spanish']) {
175
            $name = self::fixConjunction($name);
176
        }
177
178
        if (self::$options['postnominal']) {
179
            $name = self::fixPostNominal($name);
180
        }
181
182
        return $name;
183
    }
184 9
185 9
    /**
186 9
     * Capitalize first letters.
187
     *
188
     * @param string $name
189 9
     *
190 9
     * @return string
191 9
     */
192
    private static function capitalize (string $name): string
193 9
    {
194
        $name = mb_strtolower($name);
195
196
        $name = mb_ereg_replace_callback('\b\w', function ($matches) {
197
            return mb_strtoupper($matches[0]);
198
        }, $name);
199
200
        // Lowercase 's
201
        $name = mb_ereg_replace_callback('\'\w\b', function ($matches) {
202
            return mb_strtolower($matches[0]);
203
        }, $name);
204
205
        return $name;
206
    }
207
208
    /**
209
     * Skip if string is mixed case.
210
     *
211
     * @param string $name
212
     *
213
     * @return bool
214
     */
215
    private static function skipMixed (string $name): bool
216
    {
217
        $firstLetterLower = $name[0] == mb_strtolower($name[0]);
218
        $allLowerOrUpper = (mb_strtolower($name) == $name || mb_strtoupper($name) == $name);
219
220
        return ! ($firstLetterLower || $allLowerOrUpper);
221
    }
222
223
    /**
224
     * Update for Irish names.
225
     *
226
     * @param string $name
227
     *
228
     * @return string
229
     */
230
    private static function updateIrish (string $name): string
231
    {
232
        if ( ! self::$options['irish']) return $name;
233
234
        if (
235
            mb_ereg_match('.*?\bMac[A-Za-z]{2,}[^aciozj]\b', $name) ||
236
            mb_ereg_match('.*?\bMc', $name)
237
        ) {
238
            $name = self::updateMac($name);
239
        }
240
241
        return mb_ereg_replace('Macmurdo', 'MacMurdo', $name);
242
    }
243
244
    /**
245
     * Fix roman numeral names.
246
     *
247
     * @param string $name
248
     *
249
     * @return string
250
     */
251
    private static function updateRoman (string $name): string
252
    {
253
        return mb_ereg_replace_callback(self::ROMAN_REGEX, function ($matches) {
254
            return mb_strtoupper($matches[0]);
255
        }, $name);
256
    }
257
258
    /**
259
     * Updates irish Mac & Mc.
260
     *
261
     * @param string $name
262
     *
263
     * @return string
264
     */
265
    private static function updateMac (string $name): string
266
    {
267
        $name = mb_ereg_replace_callback('\b(Ma?c)([A-Za-z]+)', function ($matches) {
268
            return $matches[1] . mb_strtoupper(mb_substr($matches[2], 0, 1)) . mb_substr($matches[2], 1);
269
        }, $name);
270
271
        // Now fix "Mac" exceptions
272
        foreach (self::EXCEPTIONS as $pattern => $replacement) {
273
            $name = mb_ereg_replace($pattern, $replacement, $name);
274
        }
275
276
        return $name;
277
    }
278
279
    /**
280
     * Fix Spanish conjunctions.
281
     *
282
     * @param string $name
283
     *
284
     * @return string
285
     */
286
    private static function fixConjunction (string $name): string
287
    {
288
        foreach (self::CONJUNCTIONS as $conjunction) {
289
            $name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), $name);
290
        }
291
        return $name;
292
    }
293
294
    /**
295
     * Fix post-nominal letter cases.
296
     *
297
     * @param string $name
298
     * @return string
299
     */
300
    private static function fixPostNominal (string $name): string
301
    {
302
        foreach (self::POST_NOMINALS as $postnominal) {
303
            $name = mb_ereg_replace('\b' . $postnominal . '$', $postnominal, $name, 'ix');
304
        }
305
        return $name;
306
    }
307
}
308