convert_accent_characters.php ➔ convert_accent_characters()   D
last analyzed

Complexity

Conditions 10
Paths 6

Size

Total Lines 360
Code Lines 335

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 10
eloc 335
nc 6
nop 2
dl 0
loc 360
rs 4.8196
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Rap2hpoutre\ConvertAccentCharacters;
4
5
/**
6
 * Converts all UTF-8 accent characters to ASCII characters.
7
 *
8
 * Extracted from WordPress code (roughly copy/pasted), thanks (and sorry) to authors.
9
 *
10
 * @see https://github.com/WordPress/WordPress/blob/master/wp-includes/formatting.php
11
 * @param $string
12
 * @param null $locale
13
 * @return string
14
 */
15
function convert_accent_characters($string, $locale = null)
16
{
17
    if (!preg_match('/[\x80-\xff]/', $string)) {
18
        return $string;
19
    }
20
21
    $chars = [
22
        // Decompositions for Latin-1 Supplement
23
        'ª' => 'a',
24
        'º' => 'o',
25
        'À' => 'A',
26
        'Á' => 'A',
27
        'Â' => 'A',
28
        'Ã' => 'A',
29
        'Ä' => 'A',
30
        'Å' => 'A',
31
        'Æ' => 'AE',
32
        'Ç' => 'C',
33
        'È' => 'E',
34
        'É' => 'E',
35
        'Ê' => 'E',
36
        'Ë' => 'E',
37
        'Ì' => 'I',
38
        'Í' => 'I',
39
        'Î' => 'I',
40
        'Ï' => 'I',
41
        'Ð' => 'D',
42
        'Ñ' => 'N',
43
        'Ò' => 'O',
44
        'Ó' => 'O',
45
        'Ô' => 'O',
46
        'Õ' => 'O',
47
        'Ö' => 'O',
48
        'Ù' => 'U',
49
        'Ú' => 'U',
50
        'Û' => 'U',
51
        'Ü' => 'U',
52
        'Ý' => 'Y',
53
        'Þ' => 'TH',
54
        'ß' => 's',
55
        'à' => 'a',
56
        'á' => 'a',
57
        'â' => 'a',
58
        'ã' => 'a',
59
        'ä' => 'a',
60
        'å' => 'a',
61
        'æ' => 'ae',
62
        'ç' => 'c',
63
        'è' => 'e',
64
        'é' => 'e',
65
        'ê' => 'e',
66
        'ë' => 'e',
67
        'ì' => 'i',
68
        'í' => 'i',
69
        'î' => 'i',
70
        'ï' => 'i',
71
        'ð' => 'd',
72
        'ñ' => 'n',
73
        'ò' => 'o',
74
        'ó' => 'o',
75
        'ô' => 'o',
76
        'õ' => 'o',
77
        'ö' => 'o',
78
        'ø' => 'o',
79
        'ù' => 'u',
80
        'ú' => 'u',
81
        'û' => 'u',
82
        'ü' => 'u',
83
        'ý' => 'y',
84
        'þ' => 'th',
85
        'ÿ' => 'y',
86
        'Ø' => 'O',
87
        // Decompositions for Latin Extended-A
88
        'Ā' => 'A',
89
        'ā' => 'a',
90
        'Ă' => 'A',
91
        'ă' => 'a',
92
        'Ą' => 'A',
93
        'ą' => 'a',
94
        'Ć' => 'C',
95
        'ć' => 'c',
96
        'Ĉ' => 'C',
97
        'ĉ' => 'c',
98
        'Ċ' => 'C',
99
        'ċ' => 'c',
100
        'Č' => 'C',
101
        'č' => 'c',
102
        'Ď' => 'D',
103
        'ď' => 'd',
104
        'Đ' => 'D',
105
        'đ' => 'd',
106
        'Ē' => 'E',
107
        'ē' => 'e',
108
        'Ĕ' => 'E',
109
        'ĕ' => 'e',
110
        'Ė' => 'E',
111
        'ė' => 'e',
112
        'Ę' => 'E',
113
        'ę' => 'e',
114
        'Ě' => 'E',
115
        'ě' => 'e',
116
        'Ĝ' => 'G',
117
        'ĝ' => 'g',
118
        'Ğ' => 'G',
119
        'ğ' => 'g',
120
        'Ġ' => 'G',
121
        'ġ' => 'g',
122
        'Ģ' => 'G',
123
        'ģ' => 'g',
124
        'Ĥ' => 'H',
125
        'ĥ' => 'h',
126
        'Ħ' => 'H',
127
        'ħ' => 'h',
128
        'Ĩ' => 'I',
129
        'ĩ' => 'i',
130
        'Ī' => 'I',
131
        'ī' => 'i',
132
        'Ĭ' => 'I',
133
        'ĭ' => 'i',
134
        'Į' => 'I',
135
        'į' => 'i',
136
        'İ' => 'I',
137
        'ı' => 'i',
138
        'IJ' => 'IJ',
139
        'ij' => 'ij',
140
        'Ĵ' => 'J',
141
        'ĵ' => 'j',
142
        'Ķ' => 'K',
143
        'ķ' => 'k',
144
        'ĸ' => 'k',
145
        'Ĺ' => 'L',
146
        'ĺ' => 'l',
147
        'Ļ' => 'L',
148
        'ļ' => 'l',
149
        'Ľ' => 'L',
150
        'ľ' => 'l',
151
        'Ŀ' => 'L',
152
        'ŀ' => 'l',
153
        'Ł' => 'L',
154
        'ł' => 'l',
155
        'Ń' => 'N',
156
        'ń' => 'n',
157
        'Ņ' => 'N',
158
        'ņ' => 'n',
159
        'Ň' => 'N',
160
        'ň' => 'n',
161
        'ʼn' => 'n',
162
        'Ŋ' => 'N',
163
        'ŋ' => 'n',
164
        'Ō' => 'O',
165
        'ō' => 'o',
166
        'Ŏ' => 'O',
167
        'ŏ' => 'o',
168
        'Ő' => 'O',
169
        'ő' => 'o',
170
        'Œ' => 'OE',
171
        'œ' => 'oe',
172
        'Ŕ' => 'R',
173
        'ŕ' => 'r',
174
        'Ŗ' => 'R',
175
        'ŗ' => 'r',
176
        'Ř' => 'R',
177
        'ř' => 'r',
178
        'Ś' => 'S',
179
        'ś' => 's',
180
        'Ŝ' => 'S',
181
        'ŝ' => 's',
182
        'Ş' => 'S',
183
        'ş' => 's',
184
        'Š' => 'S',
185
        'š' => 's',
186
        'Ţ' => 'T',
187
        'ţ' => 't',
188
        'Ť' => 'T',
189
        'ť' => 't',
190
        'Ŧ' => 'T',
191
        'ŧ' => 't',
192
        'Ũ' => 'U',
193
        'ũ' => 'u',
194
        'Ū' => 'U',
195
        'ū' => 'u',
196
        'Ŭ' => 'U',
197
        'ŭ' => 'u',
198
        'Ů' => 'U',
199
        'ů' => 'u',
200
        'Ű' => 'U',
201
        'ű' => 'u',
202
        'Ų' => 'U',
203
        'ų' => 'u',
204
        'Ŵ' => 'W',
205
        'ŵ' => 'w',
206
        'Ŷ' => 'Y',
207
        'ŷ' => 'y',
208
        'Ÿ' => 'Y',
209
        'Ź' => 'Z',
210
        'ź' => 'z',
211
        'Ż' => 'Z',
212
        'ż' => 'z',
213
        'Ž' => 'Z',
214
        'ž' => 'z',
215
        'ſ' => 's',
216
        // Decompositions for Latin Extended-B
217
        'Ș' => 'S',
218
        'ș' => 's',
219
        'Ț' => 'T',
220
        'ț' => 't',
221
        // Euro Sign
222
        '€' => 'E',
223
        // GBP (Pound) Sign
224
        '£' => '',
225
        // Vowels with diacritic (Vietnamese)
226
        // unmarked
227
        'Ơ' => 'O',
228
        'ơ' => 'o',
229
        'Ư' => 'U',
230
        'ư' => 'u',
231
        // grave accent
232
        'Ầ' => 'A',
233
        'ầ' => 'a',
234
        'Ằ' => 'A',
235
        'ằ' => 'a',
236
        'Ề' => 'E',
237
        'ề' => 'e',
238
        'Ồ' => 'O',
239
        'ồ' => 'o',
240
        'Ờ' => 'O',
241
        'ờ' => 'o',
242
        'Ừ' => 'U',
243
        'ừ' => 'u',
244
        'Ỳ' => 'Y',
245
        'ỳ' => 'y',
246
        // hook
247
        'Ả' => 'A',
248
        'ả' => 'a',
249
        'Ẩ' => 'A',
250
        'ẩ' => 'a',
251
        'Ẳ' => 'A',
252
        'ẳ' => 'a',
253
        'Ẻ' => 'E',
254
        'ẻ' => 'e',
255
        'Ể' => 'E',
256
        'ể' => 'e',
257
        'Ỉ' => 'I',
258
        'ỉ' => 'i',
259
        'Ỏ' => 'O',
260
        'ỏ' => 'o',
261
        'Ổ' => 'O',
262
        'ổ' => 'o',
263
        'Ở' => 'O',
264
        'ở' => 'o',
265
        'Ủ' => 'U',
266
        'ủ' => 'u',
267
        'Ử' => 'U',
268
        'ử' => 'u',
269
        'Ỷ' => 'Y',
270
        'ỷ' => 'y',
271
        // tilde
272
        'Ẫ' => 'A',
273
        'ẫ' => 'a',
274
        'Ẵ' => 'A',
275
        'ẵ' => 'a',
276
        'Ẽ' => 'E',
277
        'ẽ' => 'e',
278
        'Ễ' => 'E',
279
        'ễ' => 'e',
280
        'Ỗ' => 'O',
281
        'ỗ' => 'o',
282
        'Ỡ' => 'O',
283
        'ỡ' => 'o',
284
        'Ữ' => 'U',
285
        'ữ' => 'u',
286
        'Ỹ' => 'Y',
287
        'ỹ' => 'y',
288
        // acute accent
289
        'Ấ' => 'A',
290
        'ấ' => 'a',
291
        'Ắ' => 'A',
292
        'ắ' => 'a',
293
        'Ế' => 'E',
294
        'ế' => 'e',
295
        'Ố' => 'O',
296
        'ố' => 'o',
297
        'Ớ' => 'O',
298
        'ớ' => 'o',
299
        'Ứ' => 'U',
300
        'ứ' => 'u',
301
        // dot below
302
        'Ạ' => 'A',
303
        'ạ' => 'a',
304
        'Ậ' => 'A',
305
        'ậ' => 'a',
306
        'Ặ' => 'A',
307
        'ặ' => 'a',
308
        'Ẹ' => 'E',
309
        'ẹ' => 'e',
310
        'Ệ' => 'E',
311
        'ệ' => 'e',
312
        'Ị' => 'I',
313
        'ị' => 'i',
314
        'Ọ' => 'O',
315
        'ọ' => 'o',
316
        'Ộ' => 'O',
317
        'ộ' => 'o',
318
        'Ợ' => 'O',
319
        'ợ' => 'o',
320
        'Ụ' => 'U',
321
        'ụ' => 'u',
322
        'Ự' => 'U',
323
        'ự' => 'u',
324
        'Ỵ' => 'Y',
325
        'ỵ' => 'y',
326
        // Vowels with diacritic (Chinese, Hanyu Pinyin)
327
        'ɑ' => 'a',
328
        // macron
329
        'Ǖ' => 'U',
330
        'ǖ' => 'u',
331
        // acute accent
332
        'Ǘ' => 'U',
333
        'ǘ' => 'u',
334
        // caron
335
        'Ǎ' => 'A',
336
        'ǎ' => 'a',
337
        'Ǐ' => 'I',
338
        'ǐ' => 'i',
339
        'Ǒ' => 'O',
340
        'ǒ' => 'o',
341
        'Ǔ' => 'U',
342
        'ǔ' => 'u',
343
        'Ǚ' => 'U',
344
        'ǚ' => 'u',
345
        // grave accent
346
        'Ǜ' => 'U',
347
        'ǜ' => 'u',
348
    ];
349
    // Used for locale-specific rules
350
    if ('de_DE' == $locale || 'de_DE_formal' == $locale || 'de_CH' == $locale || 'de_CH_informal' == $locale) {
351
        $chars['Ä'] = 'Ae';
352
        $chars['ä'] = 'ae';
353
        $chars['Ö'] = 'Oe';
354
        $chars['ö'] = 'oe';
355
        $chars['Ü'] = 'Ue';
356
        $chars['ü'] = 'ue';
357
        $chars['ß'] = 'ss';
358
    } elseif ('da_DK' === $locale) {
359
        $chars['Æ'] = 'Ae';
360
        $chars['æ'] = 'ae';
361
        $chars['Ø'] = 'Oe';
362
        $chars['ø'] = 'oe';
363
        $chars['Å'] = 'Aa';
364
        $chars['å'] = 'aa';
365
    } elseif ('ca' === $locale) {
366
        $chars['l·l'] = 'll';
367
    } elseif ('sr_RS' === $locale || 'bs_BA' === $locale) {
368
        $chars['Đ'] = 'DJ';
369
        $chars['đ'] = 'dj';
370
    }
371
    $string = strtr($string, $chars);
372
373
    return $string;
374
}