Transliteration::transliterateDiacritic()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 8
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 8
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 5
nc 1
nop 1
1
<?php
2
/**
3
 * Webino (http://webino.sk)
4
 *
5
 * @link        https://github.com/webino/WebinoI18nSanitizeLib for the canonical source repository
6
 * @copyright   Copyright (c) 2017 Webino, s. r. o. (http://webino.sk)
7
 * @author      Peter Bačinský <[email protected]>
8
 * @license     BSD-3-Clause
9
 */
10
11
namespace WebinoI18nSanitizeLib;
12
13
use Zend\Filter;
14
15
/**
16
 * Class Transliteration
17
 *
18
 * @author Martin Hujer [email protected]
19
 */
20
class Transliteration implements Filter\FilterInterface
21
{
22
    /**
23
     * Returns $value translitered to ASCII
24
     *
25
     * @param string $value
26
     * @return string
27
     */
28
    public function filter($value)
29
    {
30
        // transliterate specific chars
31
        $value = $this->transliterateCzech($value);
32
        $value = $this->transliterateSlovak($value);
33
        $value = $this->transliterateRussian($value);
34
        $value = $this->transliterateGerman($value);
35
        $value = $this->transliterateFrench($value);
36
        $value = $this->transliterateHungarian($value);
37
        $value = $this->transliteratePolish($value);
38
        $value = $this->transliterateDanish($value);
39
        $value = $this->transliterateCroatian($value);
40
        $value = $this->transliterateDiacritic($value);
41
42
        // split string to single characters
43
        $characters = mb_split('~(.)~', $value);
44
45
        $return = '';
46
        foreach ($characters as $character) {
47
            /*  maybe should contain also //IGNORE  */
48
            $converted = iconv('utf-8', 'ASCII//TRANSLIT', $character);
49
50
            // if character was converted, strip out wrong marks
51
            if ($character !== $converted) {
52
                $return .= preg_replace('~["\'^]+~', '', $converted);
53
            } else {
54
                $return .= $converted;
55
            }
56
        }
57
58
        return $return;
59
    }
60
61
    /**
62
     * Transliterate Russian chars (Cyrillic)
63
     *
64
     * @param string $s
65
     * @return string
66
     */
67
    private function transliterateRussian($s)
68
    {
69
        $map = [
70
            'А' => 'A',
71
            'Б' => 'B',
72
            'В' => 'V',
73
            'Г' => 'G',
74
            'Д' => 'D',
75
            'Є' => 'E',
76
            'Е' => 'JE',
77
            'Ё' => 'JO',
78
            'Ж' => 'ZH',
79
            'З' => 'Z',
80
            'И' => 'I',
81
            'Й' => 'J',
82
            'К' => 'K',
83
            'Л' => 'L',
84
            'М' => 'M',
85
            'Н' => 'N',
86
            'О' => 'O',
87
            'П' => 'P',
88
            'Р' => 'R',
89
            'С' => 'S',
90
            'Т' => 'T',
91
            'У' => 'U',
92
            'Ф' => 'F',
93
            'Х' => 'KH',
94
            'Ц' => 'TS',
95
            'Ч' => 'CH',
96
            'Ш' => 'SH',
97
            'Щ' => 'SHCH',
98
            'Ъ' => '',
99
            'Ы' => 'Y',
100
            'Ь' => '',
101
            'Э' => 'E',
102
            'Ю' => 'JU',
103
            'Я' => 'JA',
104
            'Ґ' => 'G',
105
            'Ї' => 'I',
106
            'а' => 'a',
107
            'б' => 'b',
108
            'в' => 'v',
109
            'г' => 'g',
110
            'д' => 'd',
111
            'є' => 'e',
112
            'е' => 'je',
113
            'ё' => 'jo',
114
            'ж' => 'zh',
115
            'з' => 'z',
116
            'и' => 'i',
117
            'й' => 'j',
118
            'к' => 'k',
119
            'л' => 'l',
120
            'м' => 'm',
121
            'н' => 'n',
122
            'о' => 'o',
123
            'п' => 'p',
124
            'р' => 'r',
125
            'с' => 's',
126
            'т' => 't',
127
            'у' => 'u',
128
            'ф' => 'f',
129
            'х' => 'kh',
130
            'ц' => 'ts',
131
            'ч' => 'ch',
132
            'ш' => 'sh',
133
            'щ' => 'shch',
134
            'ъ' => '',
135
            'ы' => 'y',
136
            'ь' => '',
137
            'э' => 'e',
138
            'ю' => 'ju',
139
            'я' => 'ja',
140
            'ґ' => 'g',
141
            'ї' => 'i',
142
        ];
143
        return strtr($s, $map);
144
    }
145
146
    /**
147
     * Transliterate Czech chars
148
     *
149
     * @param string $s
150
     * @return string
151
     */
152
    private function transliterateCzech($s)
153
    {
154
        $map = [
155
            'á' => 'a',
156
            'č' => 'c',
157
            'ď' => 'd',
158
            'é' => 'e',
159
            'ě' => 'e',
160
            'í' => 'i',
161
            'ň' => 'n',
162
            'ó' => 'o',
163
            'ř' => 'r',
164
            'š' => 's',
165
            'ť' => 't',
166
            'ú' => 'u',
167
            'ů' => 'u',
168
            'ý' => 'y',
169
            'ž' => 'z',
170
            'Á' => 'A',
171
            'Č' => 'C',
172
            'Ď' => 'D',
173
            'É' => 'E',
174
            'Ě' => 'E',
175
            'Í' => 'I',
176
            'Ň' => 'N',
177
            'Ó' => 'O',
178
            'Ř' => 'R',
179
            'Š' => 'S',
180
            'Ť' => 'T',
181
            'Ú' => 'U',
182
            'Ů' => 'U',
183
            'Ý' => 'Y',
184
            'Ž' => 'Z',
185
        ];
186
        return strtr($s, $map);
187
    }
188
189
    /**
190
     * Transliterate German chars
191
     *
192
     * @param string $s
193
     * @return string
194
     */
195 View Code Duplication
    private function transliterateGerman($s)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
196
    {
197
        $map = [
198
            //'ä' => 'ae', //messes up with slovak -> they have ä -> a
0 ignored issues
show
Unused Code Comprehensibility introduced by
58% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
199
            'ë' => 'e',
200
            'ï' => 'i',
201
            'ö' => 'oe',
202
            'ü' => 'ue',
203
            'Ä' => 'Ae',
204
            'Ë' => 'E',
205
            'Ï' => 'I',
206
            'Ö' => 'Oe',
207
            'Ü' => 'Ue',
208
            'ß' => 'ss',
209
        ];
210
        return strtr($s, $map);
211
    }
212
213
    /**
214
     * Transliterate French chars
215
     *
216
     * @param string $s
217
     * @return string
218
     */
219 View Code Duplication
    private function transliterateFrench($s)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
220
    {
221
        $map = [
222
            'â' => 'a',
223
            'ê' => 'e',
224
            'î' => 'i',
225
            'ô' => 'o',
226
            'û' => 'u',
227
            'Â' => 'A',
228
            'Ê' => 'E',
229
            'Î' => 'I',
230
            'Ô' => 'O',
231
            'Û' => 'U',
232
            'œ' => 'oe',
233
            'æ' => 'ae',
234
            'Ÿ' => 'Y',
235
            'ç' => 'c',
236
            'Ç' => 'C',
237
        ];
238
        return strtr($s, $map);
239
    }
240
241
    /**
242
     * Transliterate Hungarian chars
243
     *
244
     * @param string $s
245
     * @return string
246
     */
247 View Code Duplication
    private function transliterateHungarian($s)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
248
    {
249
        $map = [
250
            'á' => 'a',
251
            'é' => 'e',
252
            'í' => 'i',
253
            'ó' => 'o',
254
            'ö' => 'o',
255
            'ő' => 'o',
256
            'ú' => 'u',
257
            'ü' => 'u',
258
            'ű' => 'u',
259
        ];
260
        return strtr($s, $map);
261
    }
262
263
    /**
264
     * Transliterate Polish chars
265
     *
266
     * @param string $s
267
     * @return string
268
     */
269 View Code Duplication
    private function transliteratePolish($s)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
270
    {
271
        $map = [
272
            'ą' => 'a',
273
            'ę' => 'e',
274
            'ó' => 'o',
275
            'ć' => 'c',
276
            'ł' => 'l',
277
            'ń' => 'n',
278
            'ś' => 's',
279
            'ż' => 'z',
280
            'ź' => 'z',
281
            'Ó' => 'O',
282
            'Ć' => 'C',
283
            'Ł' => 'L',
284
            'Ś' => 'S',
285
            'Ż' => 'Z',
286
            'Ź' => 'Z',
287
        ];
288
        return strtr($s, $map);
289
    }
290
291
    /**
292
     * Transliterate Danish chars
293
     *
294
     * @param string $s
295
     * @return string
296
     */
297
    private function transliterateDanish($s)
298
    {
299
        $map = [
300
            'æ' => 'ae',
301
            'ø' => 'oe',
302
            'å' => 'aa',
303
            'Æ' => 'Ae',
304
            'Ø' => 'Oe',
305
            'Å' => 'Aa',
306
        ];
307
        return strtr($s, $map);
308
    }
309
310
    /**
311
     * Transliterate Croatian chars
312
     *
313
     * @param string $s
314
     * @return string
315
     */
316
    private function transliterateCroatian($s)
317
    {
318
        $map = [
319
            'Č' => 'C',
320
            'Ć' => 'C',
321
            'Ž' => 'Z',
322
            'Š' => 'S',
323
            'Đ' => 'D',
324
            'č' => 'c',
325
            'ć' => 'c',
326
            'ž' => 'z',
327
            'š' => 's',
328
            'đ' => 'd',
329
        ];
330
        return strtr($s, $map);
331
    }
332
333
    /**
334
     * Transliterate Slovak chars
335
     *
336
     * @param string $s
337
     * @return string
338
     */
339
    private function transliterateSlovak($s)
340
    {
341
        $map = [
342
            'á' => 'a',
343
            'Á' => 'A',
344
            'ä' => 'a',
345
            'Ä' => 'A',
346
            'č' => 'c',
347
            'Č' => 'C',
348
            'ď' => 'd',
349
            'Ď' => 'D',
350
            'é' => 'e',
351
            'É' => 'E',
352
            'í' => 'i',
353
            'Í' => 'I',
354
            'ĺ' => 'l',
355
            'Ĺ' => 'L',
356
            'ľ' => 'l',
357
            'Ľ' => 'L',
358
            'ň' => 'n',
359
            'Ň' => 'N',
360
            'ó' => 'o',
361
            'Ó' => 'O',
362
            'ô' => 'o',
363
            'Ô' => 'O',
364
            'ŕ' => 'r',
365
            'Ŕ' => 'R',
366
            'š' => 's',
367
            'Š' => 'S',
368
            'ť' => 't',
369
            'Ť' => 'T',
370
            'ú' => 'u',
371
            'Ú' => 'U',
372
            'Ý' => 'Y',
373
            'ý' => 'y',
374
            'ž' => 'z',
375
            'Ž' => 'Z',
376
        ];
377
        return strtr($s, $map);
378
    }
379
380
    /**
381
     * Transliterate diacritic
382
     *
383
     * @param string $s
384
     * @return string
385
     */
386
    private function transliterateDiacritic($s)
387
    {
388
        $map = [
389
            'ˇ' => '',
390
            '´' => '',
391
        ];
392
        return strtr($s, $map);
393
    }
394
}
395