1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace Doctrine\Inflector; |
6
|
|
|
|
7
|
|
|
use RuntimeException; |
8
|
|
|
use function chr; |
9
|
|
|
use function function_exists; |
10
|
|
|
use function lcfirst; |
11
|
|
|
use function mb_strtolower; |
12
|
|
|
use function ord; |
13
|
|
|
use function preg_match; |
14
|
|
|
use function preg_replace; |
15
|
|
|
use function sprintf; |
16
|
|
|
use function str_replace; |
17
|
|
|
use function strlen; |
18
|
|
|
use function strtolower; |
19
|
|
|
use function strtr; |
20
|
|
|
use function trim; |
21
|
|
|
use function ucwords; |
22
|
|
|
|
23
|
|
|
class Inflector |
24
|
|
|
{ |
25
|
|
|
private const ACCENTED_CHARACTERS = [ |
26
|
|
|
'À' => 'A', |
27
|
|
|
'Á' => 'A', |
28
|
|
|
'Â' => 'A', |
29
|
|
|
'Ã' => 'A', |
30
|
|
|
'Ä' => 'Ae', |
31
|
|
|
'Æ' => 'Ae', |
32
|
|
|
'Å' => 'Aa', |
33
|
|
|
'æ' => 'a', |
34
|
|
|
'Ç' => 'C', |
35
|
|
|
'È' => 'E', |
36
|
|
|
'É' => 'E', |
37
|
|
|
'Ê' => 'E', |
38
|
|
|
'Ë' => 'E', |
39
|
|
|
'Ì' => 'I', |
40
|
|
|
'Í' => 'I', |
41
|
|
|
'Î' => 'I', |
42
|
|
|
'Ï' => 'I', |
43
|
|
|
'Ñ' => 'N', |
44
|
|
|
'Ò' => 'O', |
45
|
|
|
'Ó' => 'O', |
46
|
|
|
'Ô' => 'O', |
47
|
|
|
'Õ' => 'O', |
48
|
|
|
'Ö' => 'Oe', |
49
|
|
|
'Ù' => 'U', |
50
|
|
|
'Ú' => 'U', |
51
|
|
|
'Û' => 'U', |
52
|
|
|
'Ü' => 'Ue', |
53
|
|
|
'Ý' => 'Y', |
54
|
|
|
'ß' => 'ss', |
55
|
|
|
'à' => 'a', |
56
|
|
|
'á' => 'a', |
57
|
|
|
'â' => 'a', |
58
|
|
|
'ã' => 'a', |
59
|
|
|
'ä' => 'ae', |
60
|
|
|
'å' => 'aa', |
61
|
|
|
'ç' => 'c', |
62
|
|
|
'è' => 'e', |
63
|
|
|
'é' => 'e', |
64
|
|
|
'ê' => 'e', |
65
|
|
|
'ë' => 'e', |
66
|
|
|
'ì' => 'i', |
67
|
|
|
'í' => 'i', |
68
|
|
|
'î' => 'i', |
69
|
|
|
'ï' => 'i', |
70
|
|
|
'ñ' => 'n', |
71
|
|
|
'ò' => 'o', |
72
|
|
|
'ó' => 'o', |
73
|
|
|
'ô' => 'o', |
74
|
|
|
'õ' => 'o', |
75
|
|
|
'ö' => 'oe', |
76
|
|
|
'ù' => 'u', |
77
|
|
|
'ú' => 'u', |
78
|
|
|
'û' => 'u', |
79
|
|
|
'ü' => 'ue', |
80
|
|
|
'ý' => 'y', |
81
|
|
|
'ÿ' => 'y', |
82
|
|
|
'Ā' => 'A', |
83
|
|
|
'ā' => 'a', |
84
|
|
|
'Ă' => 'A', |
85
|
|
|
'ă' => 'a', |
86
|
|
|
'Ą' => 'A', |
87
|
|
|
'ą' => 'a', |
88
|
|
|
'Ć' => 'C', |
89
|
|
|
'ć' => 'c', |
90
|
|
|
'Ĉ' => 'C', |
91
|
|
|
'ĉ' => 'c', |
92
|
|
|
'Ċ' => 'C', |
93
|
|
|
'ċ' => 'c', |
94
|
|
|
'Č' => 'C', |
95
|
|
|
'č' => 'c', |
96
|
|
|
'Ď' => 'D', |
97
|
|
|
'ď' => 'd', |
98
|
|
|
'Đ' => 'D', |
99
|
|
|
'đ' => 'd', |
100
|
|
|
'Ē' => 'E', |
101
|
|
|
'ē' => 'e', |
102
|
|
|
'Ĕ' => 'E', |
103
|
|
|
'ĕ' => 'e', |
104
|
|
|
'Ė' => 'E', |
105
|
|
|
'ė' => 'e', |
106
|
|
|
'Ę' => 'E', |
107
|
|
|
'ę' => 'e', |
108
|
|
|
'Ě' => 'E', |
109
|
|
|
'ě' => 'e', |
110
|
|
|
'Ĝ' => 'G', |
111
|
|
|
'ĝ' => 'g', |
112
|
|
|
'Ğ' => 'G', |
113
|
|
|
'ğ' => 'g', |
114
|
|
|
'Ġ' => 'G', |
115
|
|
|
'ġ' => 'g', |
116
|
|
|
'Ģ' => 'G', |
117
|
|
|
'ģ' => 'g', |
118
|
|
|
'Ĥ' => 'H', |
119
|
|
|
'ĥ' => 'h', |
120
|
|
|
'Ħ' => 'H', |
121
|
|
|
'ħ' => 'h', |
122
|
|
|
'Ĩ' => 'I', |
123
|
|
|
'ĩ' => 'i', |
124
|
|
|
'Ī' => 'I', |
125
|
|
|
'ī' => 'i', |
126
|
|
|
'Ĭ' => 'I', |
127
|
|
|
'ĭ' => 'i', |
128
|
|
|
'Į' => 'I', |
129
|
|
|
'į' => 'i', |
130
|
|
|
'İ' => 'I', |
131
|
|
|
'ı' => 'i', |
132
|
|
|
'IJ' => 'IJ', |
133
|
|
|
'ij' => 'ij', |
134
|
|
|
'Ĵ' => 'J', |
135
|
|
|
'ĵ' => 'j', |
136
|
|
|
'Ķ' => 'K', |
137
|
|
|
'ķ' => 'k', |
138
|
|
|
'ĸ' => 'k', |
139
|
|
|
'Ĺ' => 'L', |
140
|
|
|
'ĺ' => 'l', |
141
|
|
|
'Ļ' => 'L', |
142
|
|
|
'ļ' => 'l', |
143
|
|
|
'Ľ' => 'L', |
144
|
|
|
'ľ' => 'l', |
145
|
|
|
'Ŀ' => 'L', |
146
|
|
|
'ŀ' => 'l', |
147
|
|
|
'Ł' => 'L', |
148
|
|
|
'ł' => 'l', |
149
|
|
|
'Ń' => 'N', |
150
|
|
|
'ń' => 'n', |
151
|
|
|
'Ņ' => 'N', |
152
|
|
|
'ņ' => 'n', |
153
|
|
|
'Ň' => 'N', |
154
|
|
|
'ň' => 'n', |
155
|
|
|
'ʼn' => 'N', |
156
|
|
|
'Ŋ' => 'n', |
157
|
|
|
'ŋ' => 'N', |
158
|
|
|
'Ō' => 'O', |
159
|
|
|
'ō' => 'o', |
160
|
|
|
'Ŏ' => 'O', |
161
|
|
|
'ŏ' => 'o', |
162
|
|
|
'Ő' => 'O', |
163
|
|
|
'ő' => 'o', |
164
|
|
|
'Œ' => 'OE', |
165
|
|
|
'œ' => 'oe', |
166
|
|
|
'Ø' => 'O', |
167
|
|
|
'ø' => 'o', |
168
|
|
|
'Ŕ' => 'R', |
169
|
|
|
'ŕ' => 'r', |
170
|
|
|
'Ŗ' => 'R', |
171
|
|
|
'ŗ' => 'r', |
172
|
|
|
'Ř' => 'R', |
173
|
|
|
'ř' => 'r', |
174
|
|
|
'Ś' => 'S', |
175
|
|
|
'ś' => 's', |
176
|
|
|
'Ŝ' => 'S', |
177
|
|
|
'ŝ' => 's', |
178
|
|
|
'Ş' => 'S', |
179
|
|
|
'ş' => 's', |
180
|
|
|
'Š' => 'S', |
181
|
|
|
'š' => 's', |
182
|
|
|
'Ţ' => 'T', |
183
|
|
|
'ţ' => 't', |
184
|
|
|
'Ť' => 'T', |
185
|
|
|
'ť' => 't', |
186
|
|
|
'Ŧ' => 'T', |
187
|
|
|
'ŧ' => 't', |
188
|
|
|
'Ũ' => 'U', |
189
|
|
|
'ũ' => 'u', |
190
|
|
|
'Ū' => 'U', |
191
|
|
|
'ū' => 'u', |
192
|
|
|
'Ŭ' => 'U', |
193
|
|
|
'ŭ' => 'u', |
194
|
|
|
'Ů' => 'U', |
195
|
|
|
'ů' => 'u', |
196
|
|
|
'Ű' => 'U', |
197
|
|
|
'ű' => 'u', |
198
|
|
|
'Ų' => 'U', |
199
|
|
|
'ų' => 'u', |
200
|
|
|
'Ŵ' => 'W', |
201
|
|
|
'ŵ' => 'w', |
202
|
|
|
'Ŷ' => 'Y', |
203
|
|
|
'ŷ' => 'y', |
204
|
|
|
'Ÿ' => 'Y', |
205
|
|
|
'Ź' => 'Z', |
206
|
|
|
'ź' => 'z', |
207
|
|
|
'Ż' => 'Z', |
208
|
|
|
'ż' => 'z', |
209
|
|
|
'Ž' => 'Z', |
210
|
|
|
'ž' => 'z', |
211
|
|
|
'ſ' => 's', |
212
|
|
|
'€' => 'E', |
213
|
|
|
'£' => '', |
214
|
|
|
]; |
215
|
|
|
|
216
|
|
|
/** @var WordInflector */ |
217
|
|
|
private $singularizer; |
218
|
|
|
|
219
|
|
|
/** @var WordInflector */ |
220
|
|
|
private $pluralizer; |
221
|
|
|
|
222
|
1094 |
|
public function __construct(WordInflector $singularizer, WordInflector $pluralizer) |
223
|
|
|
{ |
224
|
1094 |
|
$this->singularizer = $singularizer; |
225
|
1094 |
|
$this->pluralizer = $pluralizer; |
226
|
1094 |
|
} |
227
|
|
|
|
228
|
|
|
/** |
229
|
|
|
* Converts a word into the format for a Doctrine table name. Converts 'ModelName' to 'model_name'. |
230
|
|
|
*/ |
231
|
4 |
|
public function tableize(string $word) : string |
232
|
|
|
{ |
233
|
4 |
|
$tableized = preg_replace('~(?<=\\w)([A-Z])~u', '_$1', $word); |
234
|
|
|
|
235
|
4 |
|
if ($tableized === null) { |
236
|
|
|
throw new RuntimeException(sprintf( |
237
|
|
|
'preg_replace returned null for value "%s"', |
238
|
|
|
$word |
239
|
|
|
)); |
240
|
|
|
} |
241
|
|
|
|
242
|
4 |
|
return mb_strtolower($tableized); |
243
|
|
|
} |
244
|
|
|
|
245
|
|
|
/** |
246
|
|
|
* Converts a word into the format for a Doctrine class name. Converts 'table_name' to 'TableName'. |
247
|
|
|
*/ |
248
|
13 |
|
public function classify(string $word) : string |
249
|
|
|
{ |
250
|
13 |
|
return str_replace([' ', '_', '-'], '', ucwords($word, ' _-')); |
251
|
|
|
} |
252
|
|
|
|
253
|
|
|
/** |
254
|
|
|
* Camelizes a word. This uses the classify() method and turns the first character to lowercase. |
255
|
|
|
*/ |
256
|
6 |
|
public function camelize(string $word) : string |
257
|
|
|
{ |
258
|
6 |
|
return lcfirst($this->classify($word)); |
259
|
|
|
} |
260
|
|
|
|
261
|
|
|
/** |
262
|
|
|
* Uppercases words with configurable delimiters between words. |
263
|
|
|
* |
264
|
|
|
* Takes a string and capitalizes all of the words, like PHP's built-in |
265
|
|
|
* ucwords function. This extends that behavior, however, by allowing the |
266
|
|
|
* word delimiters to be configured, rather than only separating on |
267
|
|
|
* whitespace. |
268
|
|
|
* |
269
|
|
|
* Here is an example: |
270
|
|
|
* <code> |
271
|
|
|
* <?php |
272
|
|
|
* $string = 'top-o-the-morning to all_of_you!'; |
273
|
|
|
* echo $inflector->capitalize($string); |
274
|
|
|
* // Top-O-The-Morning To All_of_you! |
275
|
|
|
* |
276
|
|
|
* echo $inflector->capitalize($string, '-_ '); |
277
|
|
|
* // Top-O-The-Morning To All_Of_You! |
278
|
|
|
* ?> |
279
|
|
|
* </code> |
280
|
|
|
* |
281
|
|
|
* @param string $string The string to operate on. |
282
|
|
|
* @param string $delimiters A list of word separators. |
283
|
|
|
* |
284
|
|
|
* @return string The string with all delimiter-separated words capitalized. |
285
|
|
|
*/ |
286
|
3 |
|
public function capitalize(string $string, string $delimiters = " \n\t\r\0\x0B-") : string |
287
|
|
|
{ |
288
|
3 |
|
return ucwords($string, $delimiters); |
289
|
|
|
} |
290
|
|
|
|
291
|
|
|
/** |
292
|
|
|
* Checks if the given string seems like it has utf8 characters in it. |
293
|
|
|
* |
294
|
|
|
* @param string $string The string to check for utf8 characters in. |
295
|
|
|
*/ |
296
|
10 |
|
public function seemsUtf8(string $string) : bool |
297
|
|
|
{ |
298
|
10 |
|
for ($i = 0; $i < strlen($string); $i++) { |
299
|
10 |
|
if (ord($string[$i]) < 0x80) { |
300
|
10 |
|
continue; // 0bbbbbbb |
301
|
6 |
|
} elseif ((ord($string[$i]) & 0xE0) === 0xC0) { |
302
|
6 |
|
$n = 1; // 110bbbbb |
303
|
|
|
} elseif ((ord($string[$i]) & 0xF0) === 0xE0) { |
304
|
|
|
$n = 2; // 1110bbbb |
305
|
|
|
} elseif ((ord($string[$i]) & 0xF8) === 0xF0) { |
306
|
|
|
$n = 3; // 11110bbb |
307
|
|
|
} elseif ((ord($string[$i]) & 0xFC) === 0xF8) { |
308
|
|
|
$n = 4; // 111110bb |
309
|
|
|
} elseif ((ord($string[$i]) & 0xFE) === 0xFC) { |
310
|
|
|
$n = 5; // 1111110b |
311
|
|
|
} else { |
312
|
|
|
return false; // Does not match any model |
313
|
|
|
} |
314
|
|
|
|
315
|
6 |
|
for ($j = 0; $j < $n; $j++) { // n bytes matching 10bbbbbb follow ? |
316
|
6 |
|
if (++$i === strlen($string) || ((ord($string[$i]) & 0xC0) !== 0x80)) { |
317
|
|
|
return false; |
318
|
|
|
} |
319
|
|
|
} |
320
|
|
|
} |
321
|
|
|
|
322
|
10 |
|
return true; |
323
|
|
|
} |
324
|
|
|
|
325
|
|
|
/** |
326
|
|
|
* Remove any illegal characters, accents, etc. |
327
|
|
|
* |
328
|
|
|
* @param string $string String to unaccent |
329
|
|
|
* |
330
|
|
|
* @return string Unaccented string |
331
|
|
|
*/ |
332
|
9 |
|
public function unaccent(string $string) : string |
333
|
|
|
{ |
334
|
9 |
|
if (preg_match('/[\x80-\xff]/', $string) === false) { |
335
|
|
|
return $string; |
336
|
|
|
} |
337
|
|
|
|
338
|
9 |
|
if ($this->seemsUtf8($string)) { |
339
|
9 |
|
$string = strtr($string, self::ACCENTED_CHARACTERS); |
340
|
|
|
} else { |
341
|
|
|
$characters = []; |
342
|
|
|
|
343
|
|
|
// Assume ISO-8859-1 if not UTF-8 |
344
|
|
|
$characters['in'] = |
345
|
|
|
chr(128) |
346
|
|
|
. chr(131) |
347
|
|
|
. chr(138) |
348
|
|
|
. chr(142) |
349
|
|
|
. chr(154) |
350
|
|
|
. chr(158) |
351
|
|
|
. chr(159) |
352
|
|
|
. chr(162) |
353
|
|
|
. chr(165) |
354
|
|
|
. chr(181) |
355
|
|
|
. chr(192) |
356
|
|
|
. chr(193) |
357
|
|
|
. chr(194) |
358
|
|
|
. chr(195) |
359
|
|
|
. chr(196) |
360
|
|
|
. chr(197) |
361
|
|
|
. chr(199) |
362
|
|
|
. chr(200) |
363
|
|
|
. chr(201) |
364
|
|
|
. chr(202) |
365
|
|
|
. chr(203) |
366
|
|
|
. chr(204) |
367
|
|
|
. chr(205) |
368
|
|
|
. chr(206) |
369
|
|
|
. chr(207) |
370
|
|
|
. chr(209) |
371
|
|
|
. chr(210) |
372
|
|
|
. chr(211) |
373
|
|
|
. chr(212) |
374
|
|
|
. chr(213) |
375
|
|
|
. chr(214) |
376
|
|
|
. chr(216) |
377
|
|
|
. chr(217) |
378
|
|
|
. chr(218) |
379
|
|
|
. chr(219) |
380
|
|
|
. chr(220) |
381
|
|
|
. chr(221) |
382
|
|
|
. chr(224) |
383
|
|
|
. chr(225) |
384
|
|
|
. chr(226) |
385
|
|
|
. chr(227) |
386
|
|
|
. chr(228) |
387
|
|
|
. chr(229) |
388
|
|
|
. chr(231) |
389
|
|
|
. chr(232) |
390
|
|
|
. chr(233) |
391
|
|
|
. chr(234) |
392
|
|
|
. chr(235) |
393
|
|
|
. chr(236) |
394
|
|
|
. chr(237) |
395
|
|
|
. chr(238) |
396
|
|
|
. chr(239) |
397
|
|
|
. chr(241) |
398
|
|
|
. chr(242) |
399
|
|
|
. chr(243) |
400
|
|
|
. chr(244) |
401
|
|
|
. chr(245) |
402
|
|
|
. chr(246) |
403
|
|
|
. chr(248) |
404
|
|
|
. chr(249) |
405
|
|
|
. chr(250) |
406
|
|
|
. chr(251) |
407
|
|
|
. chr(252) |
408
|
|
|
. chr(253) |
409
|
|
|
. chr(255); |
410
|
|
|
|
411
|
|
|
$characters['out'] = 'EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy'; |
412
|
|
|
|
413
|
|
|
$string = strtr($string, $characters['in'], $characters['out']); |
414
|
|
|
|
415
|
|
|
$doubleChars = []; |
416
|
|
|
|
417
|
|
|
$doubleChars['in'] = [ |
418
|
|
|
chr(140), |
419
|
|
|
chr(156), |
420
|
|
|
chr(198), |
421
|
|
|
chr(208), |
422
|
|
|
chr(222), |
423
|
|
|
chr(223), |
424
|
|
|
chr(230), |
425
|
|
|
chr(240), |
426
|
|
|
chr(254), |
427
|
|
|
]; |
428
|
|
|
|
429
|
|
|
$doubleChars['out'] = ['OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th']; |
430
|
|
|
|
431
|
|
|
$string = str_replace($doubleChars['in'], $doubleChars['out'], $string); |
432
|
|
|
} |
433
|
|
|
|
434
|
9 |
|
return $string; |
435
|
|
|
} |
436
|
|
|
|
437
|
|
|
/** |
438
|
|
|
* Convert any passed string to a url friendly string. |
439
|
|
|
* Converts 'My first blog post' to 'my-first-blog-post' |
440
|
|
|
* |
441
|
|
|
* @param string $string String to urlize. |
442
|
|
|
* |
443
|
|
|
* @return string Urlized string. |
444
|
|
|
*/ |
445
|
8 |
|
public function urlize(string $string) : string |
446
|
|
|
{ |
447
|
|
|
// Remove all non url friendly characters with the unaccent function |
448
|
8 |
|
$unaccented = $this->unaccent($string); |
449
|
|
|
|
450
|
8 |
|
if (function_exists('mb_strtolower')) { |
451
|
8 |
|
$lowered = mb_strtolower($unaccented); |
452
|
|
|
} else { |
453
|
|
|
$lowered = strtolower($unaccented); |
454
|
|
|
} |
455
|
|
|
|
456
|
|
|
$replacements = [ |
457
|
8 |
|
'/\W/' => ' ', |
458
|
|
|
'/([A-Z]+)([A-Z][a-z])/' => '\1_\2', |
459
|
|
|
'/([a-z\d])([A-Z])/' => '\1_\2', |
460
|
|
|
'/[^A-Z^a-z^0-9^\/]+/' => '-', |
461
|
|
|
]; |
462
|
|
|
|
463
|
8 |
|
$urlized = $lowered; |
464
|
|
|
|
465
|
8 |
|
foreach ($replacements as $pattern => $replacement) { |
466
|
8 |
|
$replaced = preg_replace($pattern, $replacement, $urlized); |
467
|
|
|
|
468
|
8 |
|
if ($replaced === null) { |
469
|
|
|
throw new RuntimeException(sprintf( |
470
|
|
|
'preg_replace returned null for value "%s"', |
471
|
|
|
$urlized |
472
|
|
|
)); |
473
|
|
|
} |
474
|
|
|
|
475
|
8 |
|
$urlized = $replaced; |
476
|
|
|
} |
477
|
|
|
|
478
|
8 |
|
return trim($urlized, '-'); |
479
|
|
|
} |
480
|
|
|
|
481
|
|
|
/** |
482
|
|
|
* Returns a word in singular form. |
483
|
|
|
* |
484
|
|
|
* @param string $word The word in plural form. |
485
|
|
|
* |
486
|
|
|
* @return string The word in singular form. |
487
|
|
|
*/ |
488
|
532 |
|
public function singularize(string $word) : string |
489
|
|
|
{ |
490
|
532 |
|
return $this->singularizer->inflect($word); |
491
|
|
|
} |
492
|
|
|
|
493
|
|
|
/** |
494
|
|
|
* Returns a word in plural form. |
495
|
|
|
* |
496
|
|
|
* @param string $word The word in singular form. |
497
|
|
|
* |
498
|
|
|
* @return string The word in plural form. |
499
|
|
|
*/ |
500
|
532 |
|
public function pluralize(string $word) : string |
501
|
|
|
{ |
502
|
532 |
|
return $this->pluralizer->inflect($word); |
503
|
|
|
} |
504
|
|
|
} |
505
|
|
|
|