Inflector::unaccent()   B
last analyzed

Complexity

Conditions 3
Paths 3

Size

Total Lines 103
Code Lines 88

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 7.3138

Importance

Changes 0
Metric Value
eloc 88
dl 0
loc 103
ccs 5
cts 23
cp 0.2174
rs 8.2617
c 0
b 0
f 0
cc 3
nc 3
nop 1
crap 7.3138

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Doctrine\Inflector;
6
7
use RuntimeException;
8
use function chr;
9
use function function_exists;
10
use function lcfirst;
11
use function mb_strtolower;
12
use function ord;
13
use function preg_match;
14
use function preg_replace;
15
use function sprintf;
16
use function str_replace;
17
use function strlen;
18
use function strtolower;
19
use function strtr;
20
use function trim;
21
use function ucwords;
22
23
class Inflector
24
{
25
    private const ACCENTED_CHARACTERS = [
26
        'À' => 'A',
27
        'Á' => 'A',
28
        'Â' => 'A',
29
        'Ã' => 'A',
30
        'Ä' => 'Ae',
31
        'Æ' => 'Ae',
32
        'Å' => 'Aa',
33
        'æ' => 'a',
34
        'Ç' => 'C',
35
        'È' => 'E',
36
        'É' => 'E',
37
        'Ê' => 'E',
38
        'Ë' => 'E',
39
        'Ì' => 'I',
40
        'Í' => 'I',
41
        'Î' => 'I',
42
        'Ï' => 'I',
43
        'Ñ' => 'N',
44
        'Ò' => 'O',
45
        'Ó' => 'O',
46
        'Ô' => 'O',
47
        'Õ' => 'O',
48
        'Ö' => 'Oe',
49
        'Ù' => 'U',
50
        'Ú' => 'U',
51
        'Û' => 'U',
52
        'Ü' => 'Ue',
53
        'Ý' => 'Y',
54
        'ß' => 'ss',
55
        'à' => 'a',
56
        'á' => 'a',
57
        'â' => 'a',
58
        'ã' => 'a',
59
        'ä' => 'ae',
60
        'å' => 'aa',
61
        'ç' => 'c',
62
        'è' => 'e',
63
        'é' => 'e',
64
        'ê' => 'e',
65
        'ë' => 'e',
66
        'ì' => 'i',
67
        'í' => 'i',
68
        'î' => 'i',
69
        'ï' => 'i',
70
        'ñ' => 'n',
71
        'ò' => 'o',
72
        'ó' => 'o',
73
        'ô' => 'o',
74
        'õ' => 'o',
75
        'ö' => 'oe',
76
        'ù' => 'u',
77
        'ú' => 'u',
78
        'û' => 'u',
79
        'ü' => 'ue',
80
        'ý' => 'y',
81
        'ÿ' => 'y',
82
        'Ā' => 'A',
83
        'ā' => 'a',
84
        'Ă' => 'A',
85
        'ă' => 'a',
86
        'Ą' => 'A',
87
        'ą' => 'a',
88
        'Ć' => 'C',
89
        'ć' => 'c',
90
        'Ĉ' => 'C',
91
        'ĉ' => 'c',
92
        'Ċ' => 'C',
93
        'ċ' => 'c',
94
        'Č' => 'C',
95
        'č' => 'c',
96
        'Ď' => 'D',
97
        'ď' => 'd',
98
        'Đ' => 'D',
99
        'đ' => 'd',
100
        'Ē' => 'E',
101
        'ē' => 'e',
102
        'Ĕ' => 'E',
103
        'ĕ' => 'e',
104
        'Ė' => 'E',
105
        'ė' => 'e',
106
        'Ę' => 'E',
107
        'ę' => 'e',
108
        'Ě' => 'E',
109
        'ě' => 'e',
110
        'Ĝ' => 'G',
111
        'ĝ' => 'g',
112
        'Ğ' => 'G',
113
        'ğ' => 'g',
114
        'Ġ' => 'G',
115
        'ġ' => 'g',
116
        'Ģ' => 'G',
117
        'ģ' => 'g',
118
        'Ĥ' => 'H',
119
        'ĥ' => 'h',
120
        'Ħ' => 'H',
121
        'ħ' => 'h',
122
        'Ĩ' => 'I',
123
        'ĩ' => 'i',
124
        'Ī' => 'I',
125
        'ī' => 'i',
126
        'Ĭ' => 'I',
127
        'ĭ' => 'i',
128
        'Į' => 'I',
129
        'į' => 'i',
130
        'İ' => 'I',
131
        'ı' => 'i',
132
        'IJ' => 'IJ',
133
        'ij' => 'ij',
134
        'Ĵ' => 'J',
135
        'ĵ' => 'j',
136
        'Ķ' => 'K',
137
        'ķ' => 'k',
138
        'ĸ' => 'k',
139
        'Ĺ' => 'L',
140
        'ĺ' => 'l',
141
        'Ļ' => 'L',
142
        'ļ' => 'l',
143
        'Ľ' => 'L',
144
        'ľ' => 'l',
145
        'Ŀ' => 'L',
146
        'ŀ' => 'l',
147
        'Ł' => 'L',
148
        'ł' => 'l',
149
        'Ń' => 'N',
150
        'ń' => 'n',
151
        'Ņ' => 'N',
152
        'ņ' => 'n',
153
        'Ň' => 'N',
154
        'ň' => 'n',
155
        'ʼn' => 'N',
156
        'Ŋ' => 'n',
157
        'ŋ' => 'N',
158
        'Ō' => 'O',
159
        'ō' => 'o',
160
        'Ŏ' => 'O',
161
        'ŏ' => 'o',
162
        'Ő' => 'O',
163
        'ő' => 'o',
164
        'Œ' => 'OE',
165
        'œ' => 'oe',
166
        'Ø' => 'O',
167
        'ø' => 'o',
168
        'Ŕ' => 'R',
169
        'ŕ' => 'r',
170
        'Ŗ' => 'R',
171
        'ŗ' => 'r',
172
        'Ř' => 'R',
173
        'ř' => 'r',
174
        'Ś' => 'S',
175
        'ś' => 's',
176
        'Ŝ' => 'S',
177
        'ŝ' => 's',
178
        'Ş' => 'S',
179
        'ş' => 's',
180
        'Š' => 'S',
181
        'š' => 's',
182
        'Ţ' => 'T',
183
        'ţ' => 't',
184
        'Ť' => 'T',
185
        'ť' => 't',
186
        'Ŧ' => 'T',
187
        'ŧ' => 't',
188
        'Ũ' => 'U',
189
        'ũ' => 'u',
190
        'Ū' => 'U',
191
        'ū' => 'u',
192
        'Ŭ' => 'U',
193
        'ŭ' => 'u',
194
        'Ů' => 'U',
195
        'ů' => 'u',
196
        'Ű' => 'U',
197
        'ű' => 'u',
198
        'Ų' => 'U',
199
        'ų' => 'u',
200
        'Ŵ' => 'W',
201
        'ŵ' => 'w',
202
        'Ŷ' => 'Y',
203
        'ŷ' => 'y',
204
        'Ÿ' => 'Y',
205
        'Ź' => 'Z',
206
        'ź' => 'z',
207
        'Ż' => 'Z',
208
        'ż' => 'z',
209
        'Ž' => 'Z',
210
        'ž' => 'z',
211
        'ſ' => 's',
212
        '€' => 'E',
213
        '£' => '',
214
    ];
215
216
    /** @var WordInflector */
217
    private $singularizer;
218
219
    /** @var WordInflector */
220
    private $pluralizer;
221
222 1114
    public function __construct(WordInflector $singularizer, WordInflector $pluralizer)
223
    {
224 1114
        $this->singularizer = $singularizer;
225 1114
        $this->pluralizer   = $pluralizer;
226 1114
    }
227
228
    /**
229
     * Converts a word into the format for a Doctrine table name. Converts 'ModelName' to 'model_name'.
230
     */
231 4
    public function tableize(string $word) : string
232
    {
233 4
        $tableized = preg_replace('~(?<=\\w)([A-Z])~u', '_$1', $word);
234
235 4
        if ($tableized === null) {
236
            throw new RuntimeException(sprintf(
237
                'preg_replace returned null for value "%s"',
238
                $word
239
            ));
240
        }
241
242 4
        return mb_strtolower($tableized);
243
    }
244
245
    /**
246
     * Converts a word into the format for a Doctrine class name. Converts 'table_name' to 'TableName'.
247
     */
248 13
    public function classify(string $word) : string
249
    {
250 13
        return str_replace([' ', '_', '-'], '', ucwords($word, ' _-'));
251
    }
252
253
    /**
254
     * Camelizes a word. This uses the classify() method and turns the first character to lowercase.
255
     */
256 6
    public function camelize(string $word) : string
257
    {
258 6
        return lcfirst($this->classify($word));
259
    }
260
261
    /**
262
     * Uppercases words with configurable delimiters between words.
263
     *
264
     * Takes a string and capitalizes all of the words, like PHP's built-in
265
     * ucwords function. This extends that behavior, however, by allowing the
266
     * word delimiters to be configured, rather than only separating on
267
     * whitespace.
268
     *
269
     * Here is an example:
270
     * <code>
271
     * <?php
272
     * $string = 'top-o-the-morning to all_of_you!';
273
     * echo $inflector->capitalize($string);
274
     * // Top-O-The-Morning To All_of_you!
275
     *
276
     * echo $inflector->capitalize($string, '-_ ');
277
     * // Top-O-The-Morning To All_Of_You!
278
     * ?>
279
     * </code>
280
     *
281
     * @param string $string     The string to operate on.
282
     * @param string $delimiters A list of word separators.
283
     *
284
     * @return string The string with all delimiter-separated words capitalized.
285
     */
286 3
    public function capitalize(string $string, string $delimiters = " \n\t\r\0\x0B-") : string
287
    {
288 3
        return ucwords($string, $delimiters);
289
    }
290
291
    /**
292
     * Checks if the given string seems like it has utf8 characters in it.
293
     *
294
     * @param string $string The string to check for utf8 characters in.
295
     */
296 10
    public function seemsUtf8(string $string) : bool
297
    {
298 10
        for ($i = 0; $i < strlen($string); $i++) {
299 10
            if (ord($string[$i]) < 0x80) {
300 10
                continue; // 0bbbbbbb
301
            }
302
303 6
            if ((ord($string[$i]) & 0xE0) === 0xC0) {
304 6
                $n = 1; // 110bbbbb
305
            } elseif ((ord($string[$i]) & 0xF0) === 0xE0) {
306
                $n = 2; // 1110bbbb
307
            } elseif ((ord($string[$i]) & 0xF8) === 0xF0) {
308
                $n = 3; // 11110bbb
309
            } elseif ((ord($string[$i]) & 0xFC) === 0xF8) {
310
                $n = 4; // 111110bb
311
            } elseif ((ord($string[$i]) & 0xFE) === 0xFC) {
312
                $n = 5; // 1111110b
313
            } else {
314
                return false; // Does not match any model
315
            }
316
317 6
            for ($j = 0; $j < $n; $j++) { // n bytes matching 10bbbbbb follow ?
318 6
                if (++$i === strlen($string) || ((ord($string[$i]) & 0xC0) !== 0x80)) {
319
                    return false;
320
                }
321
            }
322
        }
323
324 10
        return true;
325
    }
326
327
    /**
328
     * Remove any illegal characters, accents, etc.
329
     *
330
     * @param  string $string String to unaccent
331
     *
332
     * @return string Unaccented string
333
     */
334 9
    public function unaccent(string $string) : string
335
    {
336 9
        if (preg_match('/[\x80-\xff]/', $string) === false) {
337
            return $string;
338
        }
339
340 9
        if ($this->seemsUtf8($string)) {
341 9
            $string = strtr($string, self::ACCENTED_CHARACTERS);
342
        } else {
343
            $characters = [];
344
345
            // Assume ISO-8859-1 if not UTF-8
346
            $characters['in'] =
347
                  chr(128)
348
                . chr(131)
349
                . chr(138)
350
                . chr(142)
351
                . chr(154)
352
                . chr(158)
353
                . chr(159)
354
                . chr(162)
355
                . chr(165)
356
                . chr(181)
357
                . chr(192)
358
                . chr(193)
359
                . chr(194)
360
                . chr(195)
361
                . chr(196)
362
                . chr(197)
363
                . chr(199)
364
                . chr(200)
365
                . chr(201)
366
                . chr(202)
367
                . chr(203)
368
                . chr(204)
369
                . chr(205)
370
                . chr(206)
371
                . chr(207)
372
                . chr(209)
373
                . chr(210)
374
                . chr(211)
375
                . chr(212)
376
                . chr(213)
377
                . chr(214)
378
                . chr(216)
379
                . chr(217)
380
                . chr(218)
381
                . chr(219)
382
                . chr(220)
383
                . chr(221)
384
                . chr(224)
385
                . chr(225)
386
                . chr(226)
387
                . chr(227)
388
                . chr(228)
389
                . chr(229)
390
                . chr(231)
391
                . chr(232)
392
                . chr(233)
393
                . chr(234)
394
                . chr(235)
395
                . chr(236)
396
                . chr(237)
397
                . chr(238)
398
                . chr(239)
399
                . chr(241)
400
                . chr(242)
401
                . chr(243)
402
                . chr(244)
403
                . chr(245)
404
                . chr(246)
405
                . chr(248)
406
                . chr(249)
407
                . chr(250)
408
                . chr(251)
409
                . chr(252)
410
                . chr(253)
411
                . chr(255);
412
413
            $characters['out'] = 'EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy';
414
415
            $string = strtr($string, $characters['in'], $characters['out']);
416
417
            $doubleChars = [];
418
419
            $doubleChars['in'] = [
420
                chr(140),
421
                chr(156),
422
                chr(198),
423
                chr(208),
424
                chr(222),
425
                chr(223),
426
                chr(230),
427
                chr(240),
428
                chr(254),
429
            ];
430
431
            $doubleChars['out'] = ['OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th'];
432
433
            $string = str_replace($doubleChars['in'], $doubleChars['out'], $string);
434
        }
435
436 9
        return $string;
437
    }
438
439
    /**
440
     * Convert any passed string to a url friendly string.
441
     * Converts 'My first blog post' to 'my-first-blog-post'
442
     *
443
     * @param  string $string String to urlize.
444
     *
445
     * @return string Urlized string.
446
     */
447 8
    public function urlize(string $string) : string
448
    {
449
        // Remove all non url friendly characters with the unaccent function
450 8
        $unaccented = $this->unaccent($string);
451
452 8
        if (function_exists('mb_strtolower')) {
453 8
            $lowered = mb_strtolower($unaccented);
454
        } else {
455
            $lowered = strtolower($unaccented);
456
        }
457
458
        $replacements = [
459 8
            '/\W/' => ' ',
460
            '/([A-Z]+)([A-Z][a-z])/' => '\1_\2',
461
            '/([a-z\d])([A-Z])/' => '\1_\2',
462
            '/[^A-Z^a-z^0-9^\/]+/' => '-',
463
        ];
464
465 8
        $urlized = $lowered;
466
467 8
        foreach ($replacements as $pattern => $replacement) {
468 8
            $replaced = preg_replace($pattern, $replacement, $urlized);
469
470 8
            if ($replaced === null) {
471
                throw new RuntimeException(sprintf(
472
                    'preg_replace returned null for value "%s"',
473
                    $urlized
474
                ));
475
            }
476
477 8
            $urlized = $replaced;
478
        }
479
480 8
        return trim($urlized, '-');
481
    }
482
483
    /**
484
     * Returns a word in singular form.
485
     *
486
     * @param string $word The word in plural form.
487
     *
488
     * @return string The word in singular form.
489
     */
490 547
    public function singularize(string $word) : string
491
    {
492 547
        return $this->singularizer->inflect($word);
493
    }
494
495
    /**
496
     * Returns a word in plural form.
497
     *
498
     * @param string $word The word in singular form.
499
     *
500
     * @return string The word in plural form.
501
     */
502 537
    public function pluralize(string $word) : string
503
    {
504 537
        return $this->pluralizer->inflect($word);
505
    }
506
}
507