Failed Conditions
Pull Request — master (#116)
by Jonathan
02:38
created

Inflector::singularize()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 3
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
eloc 1
nc 1
nop 1
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Doctrine\Inflector;
6
7
use RuntimeException;
8
use function chr;
9
use function function_exists;
10
use function lcfirst;
11
use function mb_strtolower;
12
use function ord;
13
use function preg_match;
14
use function preg_replace;
15
use function sprintf;
16
use function str_replace;
17
use function strlen;
18
use function strtolower;
19
use function strtr;
20
use function trim;
21
use function ucwords;
22
23
class Inflector
24
{
25
    private const ACCENTED_CHARACTERS = [
26
        // Decompositions for Latin-1 Supplement
27
        "\xC3\x80" => 'A',
28
        "\xC3\x81" => 'A',
29
        "\xC3\x82" => 'A',
30
        "\xC3\x83" => 'A',
31
        "\xC3\x84" => 'A',
32
        "\xC3\x85" => 'A',
33
        "\xC3\x87" => 'C',
34
        "\xC3\x88" => 'E',
35
        "\xC3\x89" => 'E',
36
        "\xC3\x8A" => 'E',
37
        "\xC3\x8B" => 'E',
38
        "\xC3\x8C" => 'I',
39
        "\xC3\x8D" => 'I',
40
        "\xC3\x8E" => 'I',
41
        "\xC3\x8F" => 'I',
42
        "\xC3\x91" => 'N',
43
        "\xC3\x92" => 'O',
44
        "\xC3\x93" => 'O',
45
        "\xC3\x94" => 'O',
46
        "\xC3\x95" => 'O',
47
        "\xC3\x96" => 'O',
48
        "\xC3\x99" => 'U',
49
        "\xC3\x9A" => 'U',
50
        "\xC3\x9B" => 'U',
51
        "\xC3\x9C" => 'U',
52
        "\xC3\x9D" => 'Y',
53
        "\xC3\x9F" => 's',
54
        "\xC3\xA0" => 'a',
55
        "\xC3\xA1" => 'a',
56
        "\xC3\xA2" => 'a',
57
        "\xC3\xA3" => 'a',
58
        "\xC3\xA4" => 'a',
59
        "\xC3\xA5" => 'a',
60
        "\xC3\xA7" => 'c',
61
        "\xC3\xA8" => 'e',
62
        "\xC3\xA9" => 'e',
63
        "\xC3\xAA" => 'e',
64
        "\xC3\xAB" => 'e',
65
        "\xC3\xAC" => 'i',
66
        "\xC3\xAD" => 'i',
67
        "\xC3\xAE" => 'i',
68
        "\xC3\xAF" => 'i',
69
        "\xC3\xB1" => 'n',
70
        "\xC3\xB2" => 'o',
71
        "\xC3\xB3" => 'o',
72
        "\xC3\xB4" => 'o',
73
        "\xC3\xB5" => 'o',
74
        "\xC3\xB6" => 'o',
75
        "\xC3\xB9" => 'u',
76
        "\xC3\xBA" => 'u',
77
        "\xC3\xBB" => 'u',
78
        "\xC3\xBC" => 'u',
79
        "\xC3\xBD" => 'y',
80
        "\xC3\xBF" => 'y',
81
        // Decompositions for Latin Extended-A
82
        "\xC4\x80" => 'A',
83
        "\xC4\x81" => 'a',
84
        "\xC4\x82" => 'A',
85
        "\xC4\x83" => 'a',
86
        "\xC4\x84" => 'A',
87
        "\xC4\x85" => 'a',
88
        "\xC4\x86" => 'C',
89
        "\xC4\x87" => 'c',
90
        "\xC4\x88" => 'C',
91
        "\xC4\x89" => 'c',
92
        "\xC4\x8A" => 'C',
93
        "\xC4\x8B" => 'c',
94
        "\xC4\x8C" => 'C',
95
        "\xC4\x8D" => 'c',
96
        "\xC4\x8E" => 'D',
97
        "\xC4\x8F" => 'd',
98
        "\xC4\x90" => 'D',
99
        "\xC4\x91" => 'd',
100
        "\xC4\x92" => 'E',
101
        "\xC4\x93" => 'e',
102
        "\xC4\x94" => 'E',
103
        "\xC4\x95" => 'e',
104
        "\xC4\x96" => 'E',
105
        "\xC4\x97" => 'e',
106
        "\xC4\x98" => 'E',
107
        "\xC4\x99" => 'e',
108
        "\xC4\x9A" => 'E',
109
        "\xC4\x9B" => 'e',
110
        "\xC4\x9C" => 'G',
111
        "\xC4\x9D" => 'g',
112
        "\xC4\x9E" => 'G',
113
        "\xC4\x9F" => 'g',
114
        "\xC4\xA0" => 'G',
115
        "\xC4\xA1" => 'g',
116
        "\xC4\xA2" => 'G',
117
        "\xC4\xA3" => 'g',
118
        "\xC4\xA4" => 'H',
119
        "\xC4\xA5" => 'h',
120
        "\xC4\xA6" => 'H',
121
        "\xC4\xA7" => 'h',
122
        "\xC4\xA8" => 'I',
123
        "\xC4\xA9" => 'i',
124
        "\xC4\xAA" => 'I',
125
        "\xC4\xAB" => 'i',
126
        "\xC4\xAC" => 'I',
127
        "\xC4\xAD" => 'i',
128
        "\xC4\xAE" => 'I',
129
        "\xC4\xAF" => 'i',
130
        "\xC4\xB0" => 'I',
131
        "\xC4\xB1" => 'i',
132
        "\xC4\xB2" => 'IJ',
133
        "\xC4\xB3" => 'ij',
134
        "\xC4\xB4" => 'J',
135
        "\xC4\xB5" => 'j',
136
        "\xC4\xB6" => 'K',
137
        "\xC4\xB7" => 'k',
138
        "\xC4\xB8" => 'k',
139
        "\xC4\xB9" => 'L',
140
        "\xC4\xBA" => 'l',
141
        "\xC4\xBB" => 'L',
142
        "\xC4\xBC" => 'l',
143
        "\xC4\xBD" => 'L',
144
        "\xC4\xBE" => 'l',
145
        "\xC4\xBF" => 'L',
146
        "\xC5\x80" => 'l',
147
        "\xC5\x81" => 'L',
148
        "\xC5\x82" => 'l',
149
        "\xC5\x83" => 'N',
150
        "\xC5\x84" => 'n',
151
        "\xC5\x85" => 'N',
152
        "\xC5\x86" => 'n',
153
        "\xC5\x87" => 'N',
154
        "\xC5\x88" => 'n',
155
        "\xC5\x89" => 'N',
156
        "\xC5\x8A" => 'n',
157
        "\xC5\x8B" => 'N',
158
        "\xC5\x8C" => 'O',
159
        "\xC5\x8D" => 'o',
160
        "\xC5\x8E" => 'O',
161
        "\xC5\x8F" => 'o',
162
        "\xC5\x90" => 'O',
163
        "\xC5\x91" => 'o',
164
        "\xC5\x92" => 'OE',
165
        "\xC5\x93" => 'oe',
166
        "\xC5\x94" => 'R',
167
        "\xC5\x95" => 'r',
168
        "\xC5\x96" => 'R',
169
        "\xC5\x97" => 'r',
170
        "\xC5\x98" => 'R',
171
        "\xC5\x99" => 'r',
172
        "\xC5\x9A" => 'S',
173
        "\xC5\x9B" => 's',
174
        "\xC5\x9C" => 'S',
175
        "\xC5\x9D" => 's',
176
        "\xC5\x9E" => 'S',
177
        "\xC5\x9F" => 's',
178
        "\xC5\xA0" => 'S',
179
        "\xC5\xA1" => 's',
180
        "\xC5\xA2" => 'T',
181
        "\xC5\xA3" => 't',
182
        "\xC5\xA4" => 'T',
183
        "\xC5\xA5" => 't',
184
        "\xC5\xA6" => 'T',
185
        "\xC5\xA7" => 't',
186
        "\xC5\xA8" => 'U',
187
        "\xC5\xA9" => 'u',
188
        "\xC5\xAA" => 'U',
189
        "\xC5\xAB" => 'u',
190
        "\xC5\xAC" => 'U',
191
        "\xC5\xAD" => 'u',
192
        "\xC5\xAE" => 'U',
193
        "\xC5\xAF" => 'u',
194
        "\xC5\xB0" => 'U',
195
        "\xC5\xB1" => 'u',
196
        "\xC5\xB2" => 'U',
197
        "\xC5\xB3" => 'u',
198
        "\xC5\xB4" => 'W',
199
        "\xC5\xB5" => 'w',
200
        "\xC5\xB6" => 'Y',
201
        "\xC5\xB7" => 'y',
202
        "\xC5\xB8" => 'Y',
203
        "\xC5\xB9" => 'Z',
204
        "\xC5\xBA" => 'z',
205
        "\xC5\xBB" => 'Z',
206
        "\xC5\xBC" => 'z',
207
        "\xC5\xBD" => 'Z',
208
        "\xC5\xBE" => 'z',
209
        "\xC5\xBF" => 's',
210
        // Euro Sign
211
        "\xE2\x82\xAC" => 'E',
212
        // GBP (Pound) Sign
213
        "\xC2\xA3" => '',
214
    ];
215
216
    /** @var WordInflector */
217
    private $singularizer;
218
219
    /** @var WordInflector */
220
    private $pluralizer;
221
222 1093
    public function __construct(WordInflector $singularizer, WordInflector $pluralizer)
223
    {
224 1093
        $this->singularizer = $singularizer;
225 1093
        $this->pluralizer   = $pluralizer;
226 1093
    }
227
228
    /**
229
     * Converts a word into the format for a Doctrine table name. Converts 'ModelName' to 'model_name'.
230
     */
231 4
    public function tableize(string $word) : string
232
    {
233 4
        $tableized = preg_replace('~(?<=\\w)([A-Z])~u', '_$1', $word);
234
235 4
        if ($tableized === null) {
236
            throw new RuntimeException(sprintf(
237
                'preg_replace returned null for value "%s"',
238
                $word
239
            ));
240
        }
241
242 4
        return mb_strtolower($tableized);
243
    }
244
245
    /**
246
     * Converts a word into the format for a Doctrine class name. Converts 'table_name' to 'TableName'.
247
     */
248 13
    public function classify(string $word) : string
249
    {
250 13
        return str_replace([' ', '_', '-'], '', ucwords($word, ' _-'));
251
    }
252
253
    /**
254
     * Camelizes a word. This uses the classify() method and turns the first character to lowercase.
255
     */
256 6
    public function camelize(string $word) : string
257
    {
258 6
        return lcfirst($this->classify($word));
259
    }
260
261
    /**
262
     * Uppercases words with configurable delimiters between words.
263
     *
264
     * Takes a string and capitalizes all of the words, like PHP's built-in
265
     * ucwords function. This extends that behavior, however, by allowing the
266
     * word delimiters to be configured, rather than only separating on
267
     * whitespace.
268
     *
269
     * Here is an example:
270
     * <code>
271
     * <?php
272
     * $string = 'top-o-the-morning to all_of_you!';
273
     * echo $inflector->capitalize($string);
274
     * // Top-O-The-Morning To All_of_you!
275
     *
276
     * echo $inflector->capitalize($string, '-_ ');
277
     * // Top-O-The-Morning To All_Of_You!
278
     * ?>
279
     * </code>
280
     *
281
     * @param string $string     The string to operate on.
282
     * @param string $delimiters A list of word separators.
283
     *
284
     * @return string The string with all delimiter-separated words capitalized.
285
     */
286 3
    public function capitalize(string $string, string $delimiters = " \n\t\r\0\x0B-") : string
287
    {
288 3
        return ucwords($string, $delimiters);
289
    }
290
291
    /**
292
     * Checks if the given string seems like it has utf8 characters in it.
293
     *
294
     * @param string $string The string to check for utf8 characters in.
295
     */
296 9
    public function seemsUtf8(string $string) : bool
297
    {
298 9
        for ($i = 0; $i < strlen($string); $i++) {
299 9
            if (ord($string[$i]) < 0x80) {
300 9
                continue; // 0bbbbbbb
301 5
            } elseif ((ord($string[$i]) & 0xE0) === 0xC0) {
302 5
                $n = 1; // 110bbbbb
303
            } elseif ((ord($string[$i]) & 0xF0) === 0xE0) {
304
                $n = 2; // 1110bbbb
305
            } elseif ((ord($string[$i]) & 0xF8) === 0xF0) {
306
                $n = 3; // 11110bbb
307
            } elseif ((ord($string[$i]) & 0xFC) === 0xF8) {
308
                $n = 4; // 111110bb
309
            } elseif ((ord($string[$i]) & 0xFE) === 0xFC) {
310
                $n = 5; // 1111110b
311
            } else {
312
                return false; // Does not match any model
313
            }
314
315 5
            for ($j = 0; $j < $n; $j++) { // n bytes matching 10bbbbbb follow ?
316 5
                if (++$i === strlen($string) || ((ord($string[$i]) & 0xC0) !== 0x80)) {
317
                    return false;
318
                }
319
            }
320
        }
321
322 9
        return true;
323
    }
324
325
    /**
326
     * Remove any illegal characters, accents, etc.
327
     *
328
     * @param  string $string String to unaccent
329
     *
330
     * @return string Unaccented string
331
     */
332 8
    public function unaccent(string $string) : string
333
    {
334 8
        if (preg_match('/[\x80-\xff]/', $string) === false) {
335
            return $string;
336
        }
337
338 8
        if ($this->seemsUtf8($string)) {
339 8
            $string = strtr($string, self::ACCENTED_CHARACTERS);
340
        } else {
341
            $characters = [];
342
343
            // Assume ISO-8859-1 if not UTF-8
344
            $characters['in'] =
345
                  chr(128)
346
                . chr(131)
347
                . chr(138)
348
                . chr(142)
349
                . chr(154)
350
                . chr(158)
351
                . chr(159)
352
                . chr(162)
353
                . chr(165)
354
                . chr(181)
355
                . chr(192)
356
                . chr(193)
357
                . chr(194)
358
                . chr(195)
359
                . chr(196)
360
                . chr(197)
361
                . chr(199)
362
                . chr(200)
363
                . chr(201)
364
                . chr(202)
365
                . chr(203)
366
                . chr(204)
367
                . chr(205)
368
                . chr(206)
369
                . chr(207)
370
                . chr(209)
371
                . chr(210)
372
                . chr(211)
373
                . chr(212)
374
                . chr(213)
375
                . chr(214)
376
                . chr(216)
377
                . chr(217)
378
                . chr(218)
379
                . chr(219)
380
                . chr(220)
381
                . chr(221)
382
                . chr(224)
383
                . chr(225)
384
                . chr(226)
385
                . chr(227)
386
                . chr(228)
387
                . chr(229)
388
                . chr(231)
389
                . chr(232)
390
                . chr(233)
391
                . chr(234)
392
                . chr(235)
393
                . chr(236)
394
                . chr(237)
395
                . chr(238)
396
                . chr(239)
397
                . chr(241)
398
                . chr(242)
399
                . chr(243)
400
                . chr(244)
401
                . chr(245)
402
                . chr(246)
403
                . chr(248)
404
                . chr(249)
405
                . chr(250)
406
                . chr(251)
407
                . chr(252)
408
                . chr(253)
409
                . chr(255);
410
411
            $characters['out'] = 'EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy';
412
413
            $string = strtr($string, $characters['in'], $characters['out']);
414
415
            $doubleChars = [];
416
417
            $doubleChars['in'] = [
418
                chr(140),
419
                chr(156),
420
                chr(198),
421
                chr(208),
422
                chr(222),
423
                chr(223),
424
                chr(230),
425
                chr(240),
426
                chr(254),
427
            ];
428
429
            $doubleChars['out'] = ['OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th'];
430
431
            $string = str_replace($doubleChars['in'], $doubleChars['out'], $string);
432
        }
433
434 8
        return $string;
435
    }
436
437
    /**
438
     * Convert any passed string to a url friendly string.
439
     * Converts 'My first blog post' to 'my-first-blog-post'
440
     *
441
     * @param  string $string String to urlize.
442
     *
443
     * @return string Urlized string.
444
     */
445 7
    public function urlize(string $string) : string
446
    {
447
        // Remove all non url friendly characters with the unaccent function
448 7
        $unaccented = $this->unaccent($string);
449
450 7
        if (function_exists('mb_strtolower')) {
451 7
            $lowered = mb_strtolower($unaccented);
452
        } else {
453
            $lowered = strtolower($unaccented);
454
        }
455
456
        $replacements = [
457 7
            '/\W/' => ' ',
458
            '/([A-Z]+)([A-Z][a-z])/' => '\1_\2',
459
            '/([a-z\d])([A-Z])/' => '\1_\2',
460
            '/[^A-Z^a-z^0-9^\/]+/' => '-',
461
        ];
462
463 7
        $urlized = $lowered;
464
465 7
        foreach ($replacements as $pattern => $replacement) {
466 7
            $replaced = preg_replace($pattern, $replacement, $urlized);
467
468 7
            if ($replaced === null) {
469
                throw new RuntimeException(sprintf(
470
                    'preg_replace returned null for value "%s"',
471
                    $urlized
472
                ));
473
            }
474
475 7
            $urlized = $replaced;
476
        }
477
478 7
        return trim($urlized, '-');
479
    }
480
481
    /**
482
     * Returns a word in singular form.
483
     *
484
     * @param string $word The word in plural form.
485
     *
486
     * @return string The word in singular form.
487
     */
488 532
    public function singularize(string $word) : string
489
    {
490 532
        return $this->singularizer->inflect($word);
491
    }
492
493
    /**
494
     * Returns a word in plural form.
495
     *
496
     * @param string $word The word in singular form.
497
     *
498
     * @return string The word in plural form.
499
     */
500 532
    public function pluralize(string $word) : string
501
    {
502 532
        return $this->pluralizer->inflect($word);
503
    }
504
}
505