Failed Conditions
Push — master ( 361b0b...606d37 )
by Jonathan
37s
created

Inflector::seemsUtf8()   B

Complexity

Conditions 11
Paths 13

Size

Total Lines 27
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 28.6394

Importance

Changes 0
Metric Value
eloc 19
dl 0
loc 27
ccs 9
cts 19
cp 0.4737
rs 7.3166
c 0
b 0
f 0
cc 11
nc 13
nop 1
crap 28.6394

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Doctrine\Inflector;
6
7
use function chr;
8
use function function_exists;
9
use function lcfirst;
10
use function mb_strtolower;
11
use function ord;
12
use function preg_match;
13
use function preg_replace;
14
use function str_replace;
15
use function strlen;
16
use function strtolower;
17
use function strtr;
18
use function trim;
19
use function ucwords;
20
21
class Inflector
22
{
23
    /** @var WordInflector */
24
    private $singularizer;
25
26
    /** @var WordInflector */
27
    private $pluralizer;
28
29 1090
    public function __construct(WordInflector $singularizer, WordInflector $pluralizer)
30
    {
31 1090
        $this->singularizer = $singularizer;
32 1090
        $this->pluralizer   = $pluralizer;
33 1090
    }
34
35
    /**
36
     * Converts a word into the format for a Doctrine table name. Converts 'ModelName' to 'model_name'.
37
     */
38 4
    public function tableize(string $word) : string
39
    {
40 4
        return mb_strtolower(preg_replace('~(?<=\\w)([A-Z])~u', '_$1', $word));
41
    }
42
43
    /**
44
     * Converts a word into the format for a Doctrine class name. Converts 'table_name' to 'TableName'.
45
     */
46 13
    public function classify(string $word) : string
47
    {
48 13
        return str_replace([' ', '_', '-'], '', ucwords($word, ' _-'));
49
    }
50
51
    /**
52
     * Camelizes a word. This uses the classify() method and turns the first character to lowercase.
53
     */
54 6
    public function camelize(string $word) : string
55
    {
56 6
        return lcfirst($this->classify($word));
57
    }
58
59
    /**
60
     * Uppercases words with configurable delimiters between words.
61
     *
62
     * Takes a string and capitalizes all of the words, like PHP's built-in
63
     * ucwords function. This extends that behavior, however, by allowing the
64
     * word delimiters to be configured, rather than only separating on
65
     * whitespace.
66
     *
67
     * Here is an example:
68
     * <code>
69
     * <?php
70
     * $string = 'top-o-the-morning to all_of_you!';
71
     * echo $inflector->capitalize($string);
72
     * // Top-O-The-Morning To All_of_you!
73
     *
74
     * echo $inflector->capitalize($string, '-_ ');
75
     * // Top-O-The-Morning To All_Of_You!
76
     * ?>
77
     * </code>
78
     *
79
     * @param string $string     The string to operate on.
80
     * @param string $delimiters A list of word separators.
81
     *
82
     * @return string The string with all delimiter-separated words capitalized.
83
     */
84 3
    public function capitalize(string $string, string $delimiters = " \n\t\r\0\x0B-") : string
85
    {
86 3
        return ucwords($string, $delimiters);
87
    }
88
89
    /**
90
     * Checks if the given string seems like it has utf8 characters in it.
91
     *
92
     * @param string $string The string to check for utf8 characters in.
93
     */
94 6
    public function seemsUtf8(string $string) : bool
95
    {
96 6
        for ($i = 0; $i < strlen($string); $i++) {
97 6
            if (ord($string[$i]) < 0x80) {
98 6
                continue; // 0bbbbbbb
99 5
            } elseif ((ord($string[$i]) & 0xE0) === 0xC0) {
100 5
                $n = 1; // 110bbbbb
101
            } elseif ((ord($string[$i]) & 0xF0) === 0xE0) {
102
                $n = 2; // 1110bbbb
103
            } elseif ((ord($string[$i]) & 0xF8) === 0xF0) {
104
                $n = 3; // 11110bbb
105
            } elseif ((ord($string[$i]) & 0xFC) === 0xF8) {
106
                $n = 4; // 111110bb
107
            } elseif ((ord($string[$i]) & 0xFE) === 0xFC) {
108
                $n = 5; // 1111110b
109
            } else {
110
                return false; // Does not match any model
111
            }
112
113 5
            for ($j = 0; $j < $n; $j++) { // n bytes matching 10bbbbbb follow ?
114 5
                if (++$i === strlen($string) || ((ord($string[$i]) & 0xC0) !== 0x80)) {
115
                    return false;
116
                }
117
            }
118
        }
119
120 6
        return true;
121
    }
122
123
    /**
124
     * Remove any illegal characters, accents, etc.
125
     *
126
     * @param  string $string String to unaccent
127
     *
128
     * @return string Unaccented string
129
     */
130 5
    public function unaccent(string $string) : string
131
    {
132 5
        if (preg_match('/[\x80-\xff]/', $string) === false) {
133
            return $string;
134
        }
135
136 5
        if ($this->seemsUtf8($string)) {
137
            $characters = [
138
                // Decompositions for Latin-1 Supplement
139 5
                chr(195) . chr(128) => 'A',
140 5
                chr(195) . chr(129) => 'A',
141 5
                chr(195) . chr(130) => 'A',
142 5
                chr(195) . chr(131) => 'A',
143 5
                chr(195) . chr(132) => 'A',
144 5
                chr(195) . chr(133) => 'A',
145 5
                chr(195) . chr(135) => 'C',
146 5
                chr(195) . chr(136) => 'E',
147 5
                chr(195) . chr(137) => 'E',
148 5
                chr(195) . chr(138) => 'E',
149 5
                chr(195) . chr(139) => 'E',
150 5
                chr(195) . chr(140) => 'I',
151 5
                chr(195) . chr(141) => 'I',
152 5
                chr(195) . chr(142) => 'I',
153 5
                chr(195) . chr(143) => 'I',
154 5
                chr(195) . chr(145) => 'N',
155 5
                chr(195) . chr(146) => 'O',
156 5
                chr(195) . chr(147) => 'O',
157 5
                chr(195) . chr(148) => 'O',
158 5
                chr(195) . chr(149) => 'O',
159 5
                chr(195) . chr(150) => 'O',
160 5
                chr(195) . chr(153) => 'U',
161 5
                chr(195) . chr(154) => 'U',
162 5
                chr(195) . chr(155) => 'U',
163 5
                chr(195) . chr(156) => 'U',
164 5
                chr(195) . chr(157) => 'Y',
165 5
                chr(195) . chr(159) => 's',
166 5
                chr(195) . chr(160) => 'a',
167 5
                chr(195) . chr(161) => 'a',
168 5
                chr(195) . chr(162) => 'a',
169 5
                chr(195) . chr(163) => 'a',
170 5
                chr(195) . chr(164) => 'a',
171 5
                chr(195) . chr(165) => 'a',
172 5
                chr(195) . chr(167) => 'c',
173 5
                chr(195) . chr(168) => 'e',
174 5
                chr(195) . chr(169) => 'e',
175 5
                chr(195) . chr(170) => 'e',
176 5
                chr(195) . chr(171) => 'e',
177 5
                chr(195) . chr(172) => 'i',
178 5
                chr(195) . chr(173) => 'i',
179 5
                chr(195) . chr(174) => 'i',
180 5
                chr(195) . chr(175) => 'i',
181 5
                chr(195) . chr(177) => 'n',
182 5
                chr(195) . chr(178) => 'o',
183 5
                chr(195) . chr(179) => 'o',
184 5
                chr(195) . chr(180) => 'o',
185 5
                chr(195) . chr(181) => 'o',
186 5
                chr(195) . chr(182) => 'o',
187 5
                chr(195) . chr(182) => 'o',
188 5
                chr(195) . chr(185) => 'u',
189 5
                chr(195) . chr(186) => 'u',
190 5
                chr(195) . chr(187) => 'u',
191 5
                chr(195) . chr(188) => 'u',
192 5
                chr(195) . chr(189) => 'y',
193 5
                chr(195) . chr(191) => 'y',
194
                // Decompositions for Latin Extended-A
195 5
                chr(196) . chr(128) => 'A',
196 5
                chr(196) . chr(129) => 'a',
197 5
                chr(196) . chr(130) => 'A',
198 5
                chr(196) . chr(131) => 'a',
199 5
                chr(196) . chr(132) => 'A',
200 5
                chr(196) . chr(133) => 'a',
201 5
                chr(196) . chr(134) => 'C',
202 5
                chr(196) . chr(135) => 'c',
203 5
                chr(196) . chr(136) => 'C',
204 5
                chr(196) . chr(137) => 'c',
205 5
                chr(196) . chr(138) => 'C',
206 5
                chr(196) . chr(139) => 'c',
207 5
                chr(196) . chr(140) => 'C',
208 5
                chr(196) . chr(141) => 'c',
209 5
                chr(196) . chr(142) => 'D',
210 5
                chr(196) . chr(143) => 'd',
211 5
                chr(196) . chr(144) => 'D',
212 5
                chr(196) . chr(145) => 'd',
213 5
                chr(196) . chr(146) => 'E',
214 5
                chr(196) . chr(147) => 'e',
215 5
                chr(196) . chr(148) => 'E',
216 5
                chr(196) . chr(149) => 'e',
217 5
                chr(196) . chr(150) => 'E',
218 5
                chr(196) . chr(151) => 'e',
219 5
                chr(196) . chr(152) => 'E',
220 5
                chr(196) . chr(153) => 'e',
221 5
                chr(196) . chr(154) => 'E',
222 5
                chr(196) . chr(155) => 'e',
223 5
                chr(196) . chr(156) => 'G',
224 5
                chr(196) . chr(157) => 'g',
225 5
                chr(196) . chr(158) => 'G',
226 5
                chr(196) . chr(159) => 'g',
227 5
                chr(196) . chr(160) => 'G',
228 5
                chr(196) . chr(161) => 'g',
229 5
                chr(196) . chr(162) => 'G',
230 5
                chr(196) . chr(163) => 'g',
231 5
                chr(196) . chr(164) => 'H',
232 5
                chr(196) . chr(165) => 'h',
233 5
                chr(196) . chr(166) => 'H',
234 5
                chr(196) . chr(167) => 'h',
235 5
                chr(196) . chr(168) => 'I',
236 5
                chr(196) . chr(169) => 'i',
237 5
                chr(196) . chr(170) => 'I',
238 5
                chr(196) . chr(171) => 'i',
239 5
                chr(196) . chr(172) => 'I',
240 5
                chr(196) . chr(173) => 'i',
241 5
                chr(196) . chr(174) => 'I',
242 5
                chr(196) . chr(175) => 'i',
243 5
                chr(196) . chr(176) => 'I',
244 5
                chr(196) . chr(177) => 'i',
245 5
                chr(196) . chr(178) => 'IJ',
246 5
                chr(196) . chr(179) => 'ij',
247 5
                chr(196) . chr(180) => 'J',
248 5
                chr(196) . chr(181) => 'j',
249 5
                chr(196) . chr(182) => 'K',
250 5
                chr(196) . chr(183) => 'k',
251 5
                chr(196) . chr(184) => 'k',
252 5
                chr(196) . chr(185) => 'L',
253 5
                chr(196) . chr(186) => 'l',
254 5
                chr(196) . chr(187) => 'L',
255 5
                chr(196) . chr(188) => 'l',
256 5
                chr(196) . chr(189) => 'L',
257 5
                chr(196) . chr(190) => 'l',
258 5
                chr(196) . chr(191) => 'L',
259 5
                chr(197) . chr(128) => 'l',
260 5
                chr(197) . chr(129) => 'L',
261 5
                chr(197) . chr(130) => 'l',
262 5
                chr(197) . chr(131) => 'N',
263 5
                chr(197) . chr(132) => 'n',
264 5
                chr(197) . chr(133) => 'N',
265 5
                chr(197) . chr(134) => 'n',
266 5
                chr(197) . chr(135) => 'N',
267 5
                chr(197) . chr(136) => 'n',
268 5
                chr(197) . chr(137) => 'N',
269 5
                chr(197) . chr(138) => 'n',
270 5
                chr(197) . chr(139) => 'N',
271 5
                chr(197) . chr(140) => 'O',
272 5
                chr(197) . chr(141) => 'o',
273 5
                chr(197) . chr(142) => 'O',
274 5
                chr(197) . chr(143) => 'o',
275 5
                chr(197) . chr(144) => 'O',
276 5
                chr(197) . chr(145) => 'o',
277 5
                chr(197) . chr(146) => 'OE',
278 5
                chr(197) . chr(147) => 'oe',
279 5
                chr(197) . chr(148) => 'R',
280 5
                chr(197) . chr(149) => 'r',
281 5
                chr(197) . chr(150) => 'R',
282 5
                chr(197) . chr(151) => 'r',
283 5
                chr(197) . chr(152) => 'R',
284 5
                chr(197) . chr(153) => 'r',
285 5
                chr(197) . chr(154) => 'S',
286 5
                chr(197) . chr(155) => 's',
287 5
                chr(197) . chr(156) => 'S',
288 5
                chr(197) . chr(157) => 's',
289 5
                chr(197) . chr(158) => 'S',
290 5
                chr(197) . chr(159) => 's',
291 5
                chr(197) . chr(160) => 'S',
292 5
                chr(197) . chr(161) => 's',
293 5
                chr(197) . chr(162) => 'T',
294 5
                chr(197) . chr(163) => 't',
295 5
                chr(197) . chr(164) => 'T',
296 5
                chr(197) . chr(165) => 't',
297 5
                chr(197) . chr(166) => 'T',
298 5
                chr(197) . chr(167) => 't',
299 5
                chr(197) . chr(168) => 'U',
300 5
                chr(197) . chr(169) => 'u',
301 5
                chr(197) . chr(170) => 'U',
302 5
                chr(197) . chr(171) => 'u',
303 5
                chr(197) . chr(172) => 'U',
304 5
                chr(197) . chr(173) => 'u',
305 5
                chr(197) . chr(174) => 'U',
306 5
                chr(197) . chr(175) => 'u',
307 5
                chr(197) . chr(176) => 'U',
308 5
                chr(197) . chr(177) => 'u',
309 5
                chr(197) . chr(178) => 'U',
310 5
                chr(197) . chr(179) => 'u',
311 5
                chr(197) . chr(180) => 'W',
312 5
                chr(197) . chr(181) => 'w',
313 5
                chr(197) . chr(182) => 'Y',
314 5
                chr(197) . chr(183) => 'y',
315 5
                chr(197) . chr(184) => 'Y',
316 5
                chr(197) . chr(185) => 'Z',
317 5
                chr(197) . chr(186) => 'z',
318 5
                chr(197) . chr(187) => 'Z',
319 5
                chr(197) . chr(188) => 'z',
320 5
                chr(197) . chr(189) => 'Z',
321 5
                chr(197) . chr(190) => 'z',
322 5
                chr(197) . chr(191) => 's',
323
                // Euro Sign
324 5
                chr(226) . chr(130) . chr(172) => 'E',
325
                // GBP (Pound) Sign
326 5
                chr(194) . chr(163) => '',
327 5
                'Ä' => 'Ae',
328 5
                'ä' => 'ae',
329 5
                'Ü' => 'Ue',
330 5
                'ü' => 'ue',
331 5
                'Ö' => 'Oe',
332 5
                'ö' => 'oe',
333 5
                'ß' => 'ss',
334
                // Norwegian characters
335 5
                'Å'=>'Aa',
336 5
                'Æ'=>'Ae',
337 5
                'Ø'=>'O',
338 5
                'æ'=>'a',
339 5
                'ø'=>'o',
340 5
                'å'=>'aa',
341
            ];
342
343 5
            $string = strtr($string, $characters);
344
        } else {
345
            $characters = [];
346
347
            // Assume ISO-8859-1 if not UTF-8
348
            $characters['in'] =
349
                  chr(128)
350
                . chr(131)
351
                . chr(138)
352
                . chr(142)
353
                . chr(154)
354
                . chr(158)
355
                . chr(159)
356
                . chr(162)
357
                . chr(165)
358
                . chr(181)
359
                . chr(192)
360
                . chr(193)
361
                . chr(194)
362
                . chr(195)
363
                . chr(196)
364
                . chr(197)
365
                . chr(199)
366
                . chr(200)
367
                . chr(201)
368
                . chr(202)
369
                . chr(203)
370
                . chr(204)
371
                . chr(205)
372
                . chr(206)
373
                . chr(207)
374
                . chr(209)
375
                . chr(210)
376
                . chr(211)
377
                . chr(212)
378
                . chr(213)
379
                . chr(214)
380
                . chr(216)
381
                . chr(217)
382
                . chr(218)
383
                . chr(219)
384
                . chr(220)
385
                . chr(221)
386
                . chr(224)
387
                . chr(225)
388
                . chr(226)
389
                . chr(227)
390
                . chr(228)
391
                . chr(229)
392
                . chr(231)
393
                . chr(232)
394
                . chr(233)
395
                . chr(234)
396
                . chr(235)
397
                . chr(236)
398
                . chr(237)
399
                . chr(238)
400
                . chr(239)
401
                . chr(241)
402
                . chr(242)
403
                . chr(243)
404
                . chr(244)
405
                . chr(245)
406
                . chr(246)
407
                . chr(248)
408
                . chr(249)
409
                . chr(250)
410
                . chr(251)
411
                . chr(252)
412
                . chr(253)
413
                . chr(255);
414
415
            $characters['out'] = 'EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy';
416
417
            $string = strtr($string, $characters['in'], $characters['out']);
418
419
            $doubleChars = [];
420
421
            $doubleChars['in'] = [
422
                chr(140),
423
                chr(156),
424
                chr(198),
425
                chr(208),
426
                chr(222),
427
                chr(223),
428
                chr(230),
429
                chr(240),
430
                chr(254),
431
            ];
432
433
            $doubleChars['out'] = ['OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th'];
434
435
            $string = str_replace($doubleChars['in'], $doubleChars['out'], $string);
436
        }
437
438 5
        return $string;
439
    }
440
441
    /**
442
     * Convert any passed string to a url friendly string.
443
     * Converts 'My first blog post' to 'my-first-blog-post'
444
     *
445
     * @param  string $string String to urlize.
446
     *
447
     * @return string Urlized string.
448
     */
449 4
    public function urlize(string $string) : string
450
    {
451
        // Remove all non url friendly characters with the unaccent function
452 4
        $string = $this->unaccent($string);
453
454 4
        if (function_exists('mb_strtolower')) {
455 4
            $string = mb_strtolower($string);
456
        } else {
457
            $string = strtolower($string);
458
        }
459
460
        // Remove all none word characters
461 4
        $string = preg_replace('/\W/', ' ', $string);
462
463
        // More stripping. Replace spaces with dashes
464 4
        $string = strtolower(preg_replace(
465 4
            '/[^A-Z^a-z^0-9^\/]+/',
466 4
            '-',
467 4
            preg_replace(
468 4
                '/([a-z\d])([A-Z])/',
469 4
                '\1_\2',
470 4
                preg_replace(
471 4
                    '/([A-Z]+)([A-Z][a-z])/',
472 4
                    '\1_\2',
473 4
                    preg_replace('/::/', '/', $string)
474
                )
475
            )
476
        ));
477
478 4
        return trim($string, '-');
479
    }
480
481
    /**
482
     * Returns a word in singular form.
483
     *
484
     * @param string $word The word in plural form.
485
     *
486
     * @return string The word in singular form.
487
     */
488 532
    public function singularize(string $word) : string
489
    {
490 532
        return $this->singularizer->inflect($word);
491
    }
492
493
    /**
494
     * Returns a word in plural form.
495
     *
496
     * @param string $word The word in singular form.
497
     *
498
     * @return string The word in plural form.
499
     */
500 532
    public function pluralize(string $word) : string
501
    {
502 532
        return $this->pluralizer->inflect($word);
503
    }
504
}
505