Completed
Push — master ( 1b12be...a9625e )
by Karsten
02:00
created

WithoutAccents::getUtf8Map()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 0
Metric Value
dl 0
loc 8
ccs 0
cts 4
cp 0
rs 9.4285
c 0
b 0
f 0
cc 2
eloc 4
nc 2
nop 0
crap 6
1
<?php
2
/**
3
 * Created by gerk on 30.11.17 05:56
4
 */
5
6
namespace PeekAndPoke\Component\Psi\Psi\Str;
7
8
use PeekAndPoke\Component\Psi\UnaryFunction;
9
10
/**
11
 * Replaces all special characters with their "normal" form
12
 *
13
 * Special thanks to WORDPRESS and their authors for this... things were taken from there.
14
 *
15
 * @author Karsten J. Gerber <[email protected]>
16
 */
17
class WithoutAccents implements UnaryFunction
18
{
19
    /** @var string[] */
20
    private static $utf8Map;
21
    /** @var string[] */
22
    private static $isoIn;
23
    /** @var string[] */
24
    private static $isoOut;
25
26 629
    public function __invoke($input)
27
    {
28 629
        if ($input === null || ! is_scalar($input)) {
29
            return null;
30
        }
31
32
        // nothing to replace?
33 629
        if (! preg_match('/[\x80-\xff]/', $input)) {
34 6
            return $input;
35
        }
36
37 623
        if ($this->seemsUtf8($input)) {
38 623
            return $this->removeAccentsUtf8($input);
39
        }
40
41
        // Assume ISO-8859-1 if not UTF-8
42
        return $this->removeAccentsIso($input);
43
    }
44
45
    /**
46
     * @return string[]
47
     */
48
    public static function getUtf8Map()
49
    {
50
        if (self::$utf8Map === null) {
51
            self::setupUtf8Map();
52
        }
53
54
        return self::$utf8Map;
55
    }
56
57 623
    private function seemsUtf8($string)
58
    {
59 623
        return mb_detect_encoding($string, 'UTF-8', true);
60
    }
61
62 623
    private function removeAccentsUtf8($input)
63
    {
64 623
        if (self::$utf8Map === null) {
65
            self::setupUtf8Map();
66
        }
67
68 623
        return strtr($input, self::$utf8Map);
69
    }
70
71
    private function removeAccentsIso($string)
72
    {
73
        if (self::$isoIn === null) {
74
            self::setupIsoMap();
75
        }
76
77
        $string    = strtr($string, self::$isoIn, self::$isoOut);
0 ignored issues
show
Documentation introduced by
self::$isoIn is of type array<integer,string>, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
Documentation introduced by
self::$isoOut is of type array<integer,string>, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
78
        $doubleIn  = [chr(140), chr(156), chr(198), chr(208), chr(222), chr(223), chr(230), chr(240), chr(254)];
79
        $doubleOut = ['OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th'];
80
81
        return str_replace($doubleIn, $doubleOut, $string);
82
    }
83
84
    private static function setupIsoMap()
85
    {
86
        self::$isoIn =
87
            chr(128) . chr(131) . chr(138) . chr(142) . chr(154) . chr(158)
0 ignored issues
show
Documentation Bug introduced by
It seems like chr(128) . chr(131) . ch...) . chr(253) . chr(255) of type string is incompatible with the declared type array<integer,string> of property $isoIn.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
88
            . chr(159) . chr(162) . chr(165) . chr(181) . chr(192) . chr(193) . chr(194)
89
            . chr(195) . chr(196) . chr(197) . chr(199) . chr(200) . chr(201) . chr(202)
90
            . chr(203) . chr(204) . chr(205) . chr(206) . chr(207) . chr(209) . chr(210)
91
            . chr(211) . chr(212) . chr(213) . chr(214) . chr(216) . chr(217) . chr(218)
92
            . chr(219) . chr(220) . chr(221) . chr(224) . chr(225) . chr(226) . chr(227)
93
            . chr(228) . chr(229) . chr(231) . chr(232) . chr(233) . chr(234) . chr(235)
94
            . chr(236) . chr(237) . chr(238) . chr(239) . chr(241) . chr(242) . chr(243)
95
            . chr(244) . chr(245) . chr(246) . chr(248) . chr(249) . chr(250) . chr(251)
96
            . chr(252) . chr(253) . chr(255);
97
98
        self::$isoOut = 'EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy';
0 ignored issues
show
Documentation Bug introduced by
It seems like 'EfSZszYcYuAAAAAACEEEEII...ceeeeiiiinoooooouuuuyy' of type string is incompatible with the declared type array<integer,string> of property $isoOut.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
99
    }
100
101
    private static function setupUtf8Map()
102
    {
103
        self::$utf8Map = [
0 ignored issues
show
Documentation Bug introduced by
It seems like array(chr(194) . chr(170...95) . chr(159) => 'ss') of type array<string,string> is incompatible with the declared type array<integer,string> of property $utf8Map.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
104
            // Decompositions for Latin-1 Supplement
105
            chr(194) . chr(170)            => 'a',
106
            chr(194) . chr(186)            => 'o',
107
            chr(195) . chr(128)            => 'A',
108
            chr(195) . chr(129)            => 'A',
109
            chr(195) . chr(130)            => 'A',
110
            chr(195) . chr(131)            => 'A',
111
            chr(195) . chr(132)            => 'A',
112
            chr(195) . chr(133)            => 'A',
113
            chr(195) . chr(134)            => 'AE',
114
            chr(195) . chr(135)            => 'C',
115
            chr(195) . chr(136)            => 'E',
116
            chr(195) . chr(137)            => 'E',
117
            chr(195) . chr(138)            => 'E',
118
            chr(195) . chr(139)            => 'E',
119
            chr(195) . chr(140)            => 'I',
120
            chr(195) . chr(141)            => 'I',
121
            chr(195) . chr(142)            => 'I',
122
            chr(195) . chr(143)            => 'I',
123
            chr(195) . chr(144)            => 'D',
124
            chr(195) . chr(145)            => 'N',
125
            chr(195) . chr(146)            => 'O',
126
            chr(195) . chr(147)            => 'O',
127
            chr(195) . chr(148)            => 'O',
128
            chr(195) . chr(149)            => 'O',
129
            chr(195) . chr(150)            => 'O',
130
            chr(195) . chr(153)            => 'U',
131
            chr(195) . chr(154)            => 'U',
132
            chr(195) . chr(155)            => 'U',
133
            chr(195) . chr(156)            => 'U',
134
            chr(195) . chr(157)            => 'Y',
135
            chr(195) . chr(158)            => 'TH',
136
            chr(195) . chr(159)            => 's',
137
            chr(195) . chr(160)            => 'a',
138
            chr(195) . chr(161)            => 'a',
139
            chr(195) . chr(162)            => 'a',
140
            chr(195) . chr(163)            => 'a',
141
            chr(195) . chr(164)            => 'a',
142
            chr(195) . chr(165)            => 'a',
143
            chr(195) . chr(166)            => 'ae',
144
            chr(195) . chr(167)            => 'c',
145
            chr(195) . chr(168)            => 'e',
146
            chr(195) . chr(169)            => 'e',
147
            chr(195) . chr(170)            => 'e',
148
            chr(195) . chr(171)            => 'e',
149
            chr(195) . chr(172)            => 'i',
150
            chr(195) . chr(173)            => 'i',
151
            chr(195) . chr(174)            => 'i',
152
            chr(195) . chr(175)            => 'i',
153
            chr(195) . chr(176)            => 'd',
154
            chr(195) . chr(177)            => 'n',
155
            chr(195) . chr(178)            => 'o',
156
            chr(195) . chr(179)            => 'o',
157
            chr(195) . chr(180)            => 'o',
158
            chr(195) . chr(181)            => 'o',
159
            chr(195) . chr(182)            => 'o',
160
            chr(195) . chr(184)            => 'o',
161
            chr(195) . chr(185)            => 'u',
162
            chr(195) . chr(186)            => 'u',
163
            chr(195) . chr(187)            => 'u',
164
            chr(195) . chr(188)            => 'u',
165
            chr(195) . chr(189)            => 'y',
166
            chr(195) . chr(190)            => 'th',
167
            chr(195) . chr(191)            => 'y',
168
            chr(195) . chr(152)            => 'O',
169
            // Decompositions for Latin Extended-A
170
            chr(196) . chr(128)            => 'A',
171
            chr(196) . chr(129)            => 'a',
172
            chr(196) . chr(130)            => 'A',
173
            chr(196) . chr(131)            => 'a',
174
            chr(196) . chr(132)            => 'A',
175
            chr(196) . chr(133)            => 'a',
176
            chr(196) . chr(134)            => 'C',
177
            chr(196) . chr(135)            => 'c',
178
            chr(196) . chr(136)            => 'C',
179
            chr(196) . chr(137)            => 'c',
180
            chr(196) . chr(138)            => 'C',
181
            chr(196) . chr(139)            => 'c',
182
            chr(196) . chr(140)            => 'C',
183
            chr(196) . chr(141)            => 'c',
184
            chr(196) . chr(142)            => 'D',
185
            chr(196) . chr(143)            => 'd',
186
            chr(196) . chr(144)            => 'D',
187
            chr(196) . chr(145)            => 'd',
188
            chr(196) . chr(146)            => 'E',
189
            chr(196) . chr(147)            => 'e',
190
            chr(196) . chr(148)            => 'E',
191
            chr(196) . chr(149)            => 'e',
192
            chr(196) . chr(150)            => 'E',
193
            chr(196) . chr(151)            => 'e',
194
            chr(196) . chr(152)            => 'E',
195
            chr(196) . chr(153)            => 'e',
196
            chr(196) . chr(154)            => 'E',
197
            chr(196) . chr(155)            => 'e',
198
            chr(196) . chr(156)            => 'G',
199
            chr(196) . chr(157)            => 'g',
200
            chr(196) . chr(158)            => 'G',
201
            chr(196) . chr(159)            => 'g',
202
            chr(196) . chr(160)            => 'G',
203
            chr(196) . chr(161)            => 'g',
204
            chr(196) . chr(162)            => 'G',
205
            chr(196) . chr(163)            => 'g',
206
            chr(196) . chr(164)            => 'H',
207
            chr(196) . chr(165)            => 'h',
208
            chr(196) . chr(166)            => 'H',
209
            chr(196) . chr(167)            => 'h',
210
            chr(196) . chr(168)            => 'I',
211
            chr(196) . chr(169)            => 'i',
212
            chr(196) . chr(170)            => 'I',
213
            chr(196) . chr(171)            => 'i',
214
            chr(196) . chr(172)            => 'I',
215
            chr(196) . chr(173)            => 'i',
216
            chr(196) . chr(174)            => 'I',
217
            chr(196) . chr(175)            => 'i',
218
            chr(196) . chr(176)            => 'I',
219
            chr(196) . chr(177)            => 'i',
220
            chr(196) . chr(178)            => 'IJ',
221
            chr(196) . chr(179)            => 'ij',
222
            chr(196) . chr(180)            => 'J',
223
            chr(196) . chr(181)            => 'j',
224
            chr(196) . chr(182)            => 'K',
225
            chr(196) . chr(183)            => 'k',
226
            chr(196) . chr(184)            => 'k',
227
            chr(196) . chr(185)            => 'L',
228
            chr(196) . chr(186)            => 'l',
229
            chr(196) . chr(187)            => 'L',
230
            chr(196) . chr(188)            => 'l',
231
            chr(196) . chr(189)            => 'L',
232
            chr(196) . chr(190)            => 'l',
233
            chr(196) . chr(191)            => 'L',
234
            chr(197) . chr(128)            => 'l',
235
            chr(197) . chr(129)            => 'L',
236
            chr(197) . chr(130)            => 'l',
237
            chr(197) . chr(131)            => 'N',
238
            chr(197) . chr(132)            => 'n',
239
            chr(197) . chr(133)            => 'N',
240
            chr(197) . chr(134)            => 'n',
241
            chr(197) . chr(135)            => 'N',
242
            chr(197) . chr(136)            => 'n',
243
            chr(197) . chr(137)            => 'N',
244
            chr(197) . chr(138)            => 'n',
245
            chr(197) . chr(139)            => 'N',
246
            chr(197) . chr(140)            => 'O',
247
            chr(197) . chr(141)            => 'o',
248
            chr(197) . chr(142)            => 'O',
249
            chr(197) . chr(143)            => 'o',
250
            chr(197) . chr(144)            => 'O',
251
            chr(197) . chr(145)            => 'o',
252
            chr(197) . chr(146)            => 'OE',
253
            chr(197) . chr(147)            => 'oe',
254
            chr(197) . chr(148)            => 'R',
255
            chr(197) . chr(149)            => 'r',
256
            chr(197) . chr(150)            => 'R',
257
            chr(197) . chr(151)            => 'r',
258
            chr(197) . chr(152)            => 'R',
259
            chr(197) . chr(153)            => 'r',
260
            chr(197) . chr(154)            => 'S',
261
            chr(197) . chr(155)            => 's',
262
            chr(197) . chr(156)            => 'S',
263
            chr(197) . chr(157)            => 's',
264
            chr(197) . chr(158)            => 'S',
265
            chr(197) . chr(159)            => 's',
266
            chr(197) . chr(160)            => 'S',
267
            chr(197) . chr(161)            => 's',
268
            chr(197) . chr(162)            => 'T',
269
            chr(197) . chr(163)            => 't',
270
            chr(197) . chr(164)            => 'T',
271
            chr(197) . chr(165)            => 't',
272
            chr(197) . chr(166)            => 'T',
273
            chr(197) . chr(167)            => 't',
274
            chr(197) . chr(168)            => 'U',
275
            chr(197) . chr(169)            => 'u',
276
            chr(197) . chr(170)            => 'U',
277
            chr(197) . chr(171)            => 'u',
278
            chr(197) . chr(172)            => 'U',
279
            chr(197) . chr(173)            => 'u',
280
            chr(197) . chr(174)            => 'U',
281
            chr(197) . chr(175)            => 'u',
282
            chr(197) . chr(176)            => 'U',
283
            chr(197) . chr(177)            => 'u',
284
            chr(197) . chr(178)            => 'U',
285
            chr(197) . chr(179)            => 'u',
286
            chr(197) . chr(180)            => 'W',
287
            chr(197) . chr(181)            => 'w',
288
            chr(197) . chr(182)            => 'Y',
289
            chr(197) . chr(183)            => 'y',
290
            chr(197) . chr(184)            => 'Y',
291
            chr(197) . chr(185)            => 'Z',
292
            chr(197) . chr(186)            => 'z',
293
            chr(197) . chr(187)            => 'Z',
294
            chr(197) . chr(188)            => 'z',
295
            chr(197) . chr(189)            => 'Z',
296
            chr(197) . chr(190)            => 'z',
297
            chr(197) . chr(191)            => 's',
298
            // Decompositions for Latin Extended-B
299
            chr(200) . chr(152)            => 'S',
300
            chr(200) . chr(153)            => 's',
301
            chr(200) . chr(154)            => 'T',
302
            chr(200) . chr(155)            => 't',
303
            // Euro Sign
304
            chr(226) . chr(130) . chr(172) => 'E',
305
            // GBP (Pound) Sign
306
            chr(194) . chr(163)            => '',
307
            // Vowels with diacritic (Vietnamese)
308
            // unmarked
309
            chr(198) . chr(160)            => 'O',
310
            chr(198) . chr(161)            => 'o',
311
            chr(198) . chr(175)            => 'U',
312
            chr(198) . chr(176)            => 'u',
313
            // grave accent
314
            chr(225) . chr(186) . chr(166) => 'A',
315
            chr(225) . chr(186) . chr(167) => 'a',
316
            chr(225) . chr(186) . chr(176) => 'A',
317
            chr(225) . chr(186) . chr(177) => 'a',
318
            chr(225) . chr(187) . chr(128) => 'E',
319
            chr(225) . chr(187) . chr(129) => 'e',
320
            chr(225) . chr(187) . chr(146) => 'O',
321
            chr(225) . chr(187) . chr(147) => 'o',
322
            chr(225) . chr(187) . chr(156) => 'O',
323
            chr(225) . chr(187) . chr(157) => 'o',
324
            chr(225) . chr(187) . chr(170) => 'U',
325
            chr(225) . chr(187) . chr(171) => 'u',
326
            chr(225) . chr(187) . chr(178) => 'Y',
327
            chr(225) . chr(187) . chr(179) => 'y',
328
            // hook
329
            chr(225) . chr(186) . chr(162) => 'A',
330
            chr(225) . chr(186) . chr(163) => 'a',
331
            chr(225) . chr(186) . chr(168) => 'A',
332
            chr(225) . chr(186) . chr(169) => 'a',
333
            chr(225) . chr(186) . chr(178) => 'A',
334
            chr(225) . chr(186) . chr(179) => 'a',
335
            chr(225) . chr(186) . chr(186) => 'E',
336
            chr(225) . chr(186) . chr(187) => 'e',
337
            chr(225) . chr(187) . chr(130) => 'E',
338
            chr(225) . chr(187) . chr(131) => 'e',
339
            chr(225) . chr(187) . chr(136) => 'I',
340
            chr(225) . chr(187) . chr(137) => 'i',
341
            chr(225) . chr(187) . chr(142) => 'O',
342
            chr(225) . chr(187) . chr(143) => 'o',
343
            chr(225) . chr(187) . chr(148) => 'O',
344
            chr(225) . chr(187) . chr(149) => 'o',
345
            chr(225) . chr(187) . chr(158) => 'O',
346
            chr(225) . chr(187) . chr(159) => 'o',
347
            chr(225) . chr(187) . chr(166) => 'U',
348
            chr(225) . chr(187) . chr(167) => 'u',
349
            chr(225) . chr(187) . chr(172) => 'U',
350
            chr(225) . chr(187) . chr(173) => 'u',
351
            chr(225) . chr(187) . chr(182) => 'Y',
352
            chr(225) . chr(187) . chr(183) => 'y',
353
            // tilde
354
            chr(225) . chr(186) . chr(170) => 'A',
355
            chr(225) . chr(186) . chr(171) => 'a',
356
            chr(225) . chr(186) . chr(180) => 'A',
357
            chr(225) . chr(186) . chr(181) => 'a',
358
            chr(225) . chr(186) . chr(188) => 'E',
359
            chr(225) . chr(186) . chr(189) => 'e',
360
            chr(225) . chr(187) . chr(132) => 'E',
361
            chr(225) . chr(187) . chr(133) => 'e',
362
            chr(225) . chr(187) . chr(150) => 'O',
363
            chr(225) . chr(187) . chr(151) => 'o',
364
            chr(225) . chr(187) . chr(160) => 'O',
365
            chr(225) . chr(187) . chr(161) => 'o',
366
            chr(225) . chr(187) . chr(174) => 'U',
367
            chr(225) . chr(187) . chr(175) => 'u',
368
            chr(225) . chr(187) . chr(184) => 'Y',
369
            chr(225) . chr(187) . chr(185) => 'y',
370
            // acute accent
371
            chr(225) . chr(186) . chr(164) => 'A',
372
            chr(225) . chr(186) . chr(165) => 'a',
373
            chr(225) . chr(186) . chr(174) => 'A',
374
            chr(225) . chr(186) . chr(175) => 'a',
375
            chr(225) . chr(186) . chr(190) => 'E',
376
            chr(225) . chr(186) . chr(191) => 'e',
377
            chr(225) . chr(187) . chr(144) => 'O',
378
            chr(225) . chr(187) . chr(145) => 'o',
379
            chr(225) . chr(187) . chr(154) => 'O',
380
            chr(225) . chr(187) . chr(155) => 'o',
381
            chr(225) . chr(187) . chr(168) => 'U',
382
            chr(225) . chr(187) . chr(169) => 'u',
383
            // dot below
384
            chr(225) . chr(186) . chr(160) => 'A',
385
            chr(225) . chr(186) . chr(161) => 'a',
386
            chr(225) . chr(186) . chr(172) => 'A',
387
            chr(225) . chr(186) . chr(173) => 'a',
388
            chr(225) . chr(186) . chr(182) => 'A',
389
            chr(225) . chr(186) . chr(183) => 'a',
390
            chr(225) . chr(186) . chr(184) => 'E',
391
            chr(225) . chr(186) . chr(185) => 'e',
392
            chr(225) . chr(187) . chr(134) => 'E',
393
            chr(225) . chr(187) . chr(135) => 'e',
394
            chr(225) . chr(187) . chr(138) => 'I',
395
            chr(225) . chr(187) . chr(139) => 'i',
396
            chr(225) . chr(187) . chr(140) => 'O',
397
            chr(225) . chr(187) . chr(141) => 'o',
398
            chr(225) . chr(187) . chr(152) => 'O',
399
            chr(225) . chr(187) . chr(153) => 'o',
400
            chr(225) . chr(187) . chr(162) => 'O',
401
            chr(225) . chr(187) . chr(163) => 'o',
402
            chr(225) . chr(187) . chr(164) => 'U',
403
            chr(225) . chr(187) . chr(165) => 'u',
404
            chr(225) . chr(187) . chr(176) => 'U',
405
            chr(225) . chr(187) . chr(177) => 'u',
406
            chr(225) . chr(187) . chr(180) => 'Y',
407
            chr(225) . chr(187) . chr(181) => 'y',
408
            // Vowels with diacritic (Chinese, Hanyu Pinyin)
409
            chr(201) . chr(145)            => 'a',
410
            // macron
411
            chr(199) . chr(149)            => 'U',
412
            chr(199) . chr(150)            => 'u',
413
            // acute accent
414
            chr(199) . chr(151)            => 'U',
415
            chr(199) . chr(152)            => 'u',
416
            // caron
417
            chr(199) . chr(141)            => 'A',
418
            chr(199) . chr(142)            => 'a',
419
            chr(199) . chr(143)            => 'I',
420
            chr(199) . chr(144)            => 'i',
421
            chr(199) . chr(145)            => 'O',
422
            chr(199) . chr(146)            => 'o',
423
            chr(199) . chr(147)            => 'U',
424
            chr(199) . chr(148)            => 'u',
425
            chr(199) . chr(153)            => 'U',
426
            chr(199) . chr(154)            => 'u',
427
            // grave accent
428
            chr(199) . chr(155)            => 'U',
429
            chr(199) . chr(156)            => 'u',
430
            // german umlauts
431
            chr(195) . chr(132)            => 'Ae',
432
            chr(195) . chr(164)            => 'ae',
433
            chr(195) . chr(150)            => 'Oe',
434
            chr(195) . chr(182)            => 'oe',
435
            chr(195) . chr(156)            => 'Ue',
436
            chr(195) . chr(188)            => 'ue',
437
            chr(195) . chr(159)            => 'ss',
438
        ];
439
    }
440
}
441