1
|
|
|
<?php declare(strict_types=1); |
2
|
|
|
/** |
3
|
|
|
* This file is part of the Phootwork package. |
4
|
|
|
* For the full copyright and license information, please view the LICENSE |
5
|
|
|
* file that was distributed with this source code. |
6
|
|
|
* |
7
|
|
|
* @license MIT License |
8
|
|
|
* @copyright Thomas Gossmann |
9
|
|
|
*/ |
10
|
|
|
|
11
|
|
|
namespace phootwork\lang\inflector; |
12
|
|
|
|
13
|
|
|
/** |
14
|
|
|
* Standard replacement English pluralizer class. Based on the links below |
15
|
|
|
* |
16
|
|
|
* @link http://kuwamoto.org/2007/12/17/improved-pluralizing-in-php-actionscript-and-ror/ |
17
|
|
|
* @link http://blogs.msdn.com/dmitryr/archive/2007/01/11/simple-english-noun-pluralizer-in-c.aspx |
18
|
|
|
* @link http://api.cakephp.org/view_source/inflector/ |
19
|
|
|
* |
20
|
|
|
* @author paul.hanssen |
21
|
|
|
* @author Cristiano Cinotti |
22
|
|
|
*/ |
23
|
|
|
class Inflector implements InflectorInterface { |
24
|
|
|
/** |
25
|
|
|
* @var array |
26
|
|
|
*/ |
27
|
|
|
protected $plural = [ |
28
|
|
|
'(ind|vert)ex' => '\1ices', |
29
|
|
|
'(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|vir)us' => '\1i', |
30
|
|
|
'(buffal|tomat)o' => '\1oes', |
31
|
|
|
|
32
|
|
|
'x' => 'xes', |
33
|
|
|
'ch' => 'ches', |
34
|
|
|
'sh' => 'shes', |
35
|
|
|
'ss' => 'sses', |
36
|
|
|
|
37
|
|
|
'ay' => 'ays', |
38
|
|
|
'ey' => 'eys', |
39
|
|
|
'iy' => 'iys', |
40
|
|
|
'oy' => 'oys', |
41
|
|
|
'uy' => 'uys', |
42
|
|
|
'y' => 'ies', |
43
|
|
|
|
44
|
|
|
'ao' => 'aos', |
45
|
|
|
'eo' => 'eos', |
46
|
|
|
'io' => 'ios', |
47
|
|
|
'oo' => 'oos', |
48
|
|
|
'uo' => 'uos', |
49
|
|
|
'o' => 'os', |
50
|
|
|
|
51
|
|
|
'us' => 'uses', |
52
|
|
|
|
53
|
|
|
'cis' => 'ces', |
54
|
|
|
'sis' => 'ses', |
55
|
|
|
'xis' => 'xes', |
56
|
|
|
|
57
|
|
|
'zoon' => 'zoa', |
58
|
|
|
|
59
|
|
|
'itis' => 'itis', |
60
|
|
|
'ois' => 'ois', |
61
|
|
|
'pox' => 'pox', |
62
|
|
|
'ox' => 'oxes', |
63
|
|
|
|
64
|
|
|
'foot' => 'feet', |
65
|
|
|
'goose' => 'geese', |
66
|
|
|
'tooth' => 'teeth', |
67
|
|
|
'quiz' => 'quizzes', |
68
|
|
|
'alias' => 'aliases', |
69
|
|
|
|
70
|
|
|
'alf' => 'alves', |
71
|
|
|
'elf' => 'elves', |
72
|
|
|
'olf' => 'olves', |
73
|
|
|
'arf' => 'arves', |
74
|
|
|
'nife' => 'nives', |
75
|
|
|
'life' => 'lives' |
76
|
|
|
]; |
77
|
|
|
|
78
|
|
|
/** |
79
|
|
|
* @var array |
80
|
|
|
*/ |
81
|
|
|
protected $irregular = [ |
82
|
|
|
'matrix' => 'matrices', |
83
|
|
|
'leaf' => 'leaves', |
84
|
|
|
'loaf' => 'loaves', |
85
|
|
|
'move' => 'moves', |
86
|
|
|
'foot' => 'feet', |
87
|
|
|
'goose' => 'geese', |
88
|
|
|
'genus' => 'genera', |
89
|
|
|
'sex' => 'sexes', |
90
|
|
|
'ox' => 'oxen', |
91
|
|
|
'child' => 'children', |
92
|
|
|
'man' => 'men', |
93
|
|
|
'tooth' => 'teeth', |
94
|
|
|
'person' => 'people', |
95
|
|
|
'wife' => 'wives', |
96
|
|
|
'mythos' => 'mythoi', |
97
|
|
|
'testis' => 'testes', |
98
|
|
|
'numen' => 'numina', |
99
|
|
|
'quiz' => 'quizzes', |
100
|
|
|
'alias' => 'aliases', |
101
|
|
|
]; |
102
|
|
|
|
103
|
|
|
/** |
104
|
|
|
* @var array |
105
|
|
|
*/ |
106
|
|
|
protected $uncountable = [ |
107
|
|
|
'sheep', |
108
|
|
|
'fish', |
109
|
|
|
'deer', |
110
|
|
|
'series', |
111
|
|
|
'species', |
112
|
|
|
'money', |
113
|
|
|
'rice', |
114
|
|
|
'information', |
115
|
|
|
'equipment', |
116
|
|
|
'news', |
117
|
|
|
'people', |
118
|
|
|
]; |
119
|
|
|
|
120
|
|
|
/** @var array */ |
121
|
|
|
protected $singular; |
122
|
|
|
|
123
|
|
|
/** |
124
|
|
|
* Array of words that could be ambiguously interpreted. Eg: |
125
|
|
|
* `isPlural` method can't recognize 'menus' as plural, because it considers 'menus' as the |
126
|
|
|
* singular of 'menuses'. |
127
|
|
|
* |
128
|
|
|
* @var array |
129
|
|
|
*/ |
130
|
|
|
protected $ambiguous = [ |
131
|
|
|
'menu' => 'menus' |
132
|
|
|
]; |
133
|
|
|
|
134
|
|
|
|
135
|
285 |
|
public function __construct() { |
136
|
|
|
// Create the $singular array |
137
|
285 |
|
$this->singular = array_flip($this->plural); |
138
|
285 |
|
$this->singular = array_slice($this->singular, 3); |
|
|
|
|
139
|
|
|
|
140
|
|
|
$reg = [ |
141
|
285 |
|
'(ind|vert)ices' => '\1ex', |
142
|
|
|
'(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|vir)i' => '\1us', |
143
|
|
|
'(buffal|tomat)oes' => '\1o' |
144
|
|
|
]; |
145
|
|
|
|
146
|
285 |
|
$this->singular = array_merge($reg, $this->singular); |
147
|
|
|
|
148
|
|
|
// We have an ambiguity: -xes is the plural form of -x or -xis. By now, we choose -x. Words with -xis suffix |
149
|
|
|
// should be added to the $ambiguous array. |
150
|
285 |
|
$this->singular['xes'] = 'x'; |
151
|
285 |
|
} |
152
|
|
|
|
153
|
|
|
/** |
154
|
|
|
* Generate a plural name based on the passed in root. |
155
|
|
|
* |
156
|
|
|
* @param string $root The root that needs to be pluralized (e.g. Author) |
157
|
|
|
* @return string The plural form of $root (e.g. Authors). |
158
|
|
|
* @throws \InvalidArgumentException If the parameter is not a string. |
159
|
|
|
*/ |
160
|
69 |
|
public function getPluralForm(string $root): string { |
161
|
69 |
|
$pluralForm = $root; |
162
|
|
|
|
163
|
69 |
|
if (!in_array(strtolower($root), $this->uncountable)) { |
164
|
|
|
// This check must be run before `checkIrregularForm` call |
165
|
63 |
|
if (!$this->isAmbiguousPlural($root)) { |
166
|
63 |
|
if (null !== $replacement = $this->checkIrregularForm($root, $this->irregular)) { |
167
|
15 |
|
$pluralForm = $replacement; |
168
|
48 |
|
} elseif (null !== $replacement = $this->checkIrregularSuffix($root, $this->plural)) { |
169
|
36 |
|
$pluralForm = $replacement; |
170
|
12 |
|
} elseif (!$this->isPlural($root)) { |
171
|
|
|
// fallback to naive pluralization |
172
|
11 |
|
$pluralForm = $root . 's'; |
173
|
|
|
} |
174
|
|
|
} |
175
|
|
|
} |
176
|
|
|
|
177
|
69 |
|
return $pluralForm; |
178
|
|
|
} |
179
|
|
|
|
180
|
|
|
/** |
181
|
|
|
* Generate a singular name based on the passed in root. |
182
|
|
|
* |
183
|
|
|
* @param string $root The root that needs to be pluralized (e.g. Author) |
184
|
|
|
* @return string The singular form of $root (e.g. Authors). |
185
|
|
|
* @throws \InvalidArgumentException If the parameter is not a string. |
186
|
|
|
*/ |
187
|
68 |
|
public function getSingularForm(string $root): string { |
188
|
68 |
|
$singularForm = $root; |
189
|
|
|
|
190
|
68 |
|
if (!in_array(strtolower($root), $this->uncountable)) { |
191
|
62 |
|
if (null !== $replacement = $this->checkIrregularForm($root, array_flip($this->irregular))) { |
192
|
16 |
|
$singularForm = $replacement; |
193
|
46 |
|
} elseif (null !== $replacement = $this->checkIrregularSuffix($root, $this->singular)) { |
194
|
36 |
|
$singularForm = $replacement; |
195
|
10 |
|
} elseif (!$this->isSingular($root)) { |
196
|
|
|
// fallback to naive singularization |
197
|
9 |
|
return substr($root, 0, -1); |
198
|
|
|
} |
199
|
|
|
} |
200
|
|
|
|
201
|
59 |
|
return $singularForm; |
202
|
|
|
} |
203
|
|
|
|
204
|
|
|
/** |
205
|
|
|
* Check if $root word is plural. |
206
|
|
|
* |
207
|
|
|
* @param string $root |
208
|
|
|
* |
209
|
|
|
* @return bool |
210
|
|
|
*/ |
211
|
79 |
|
public function isPlural(string $root): bool { |
212
|
79 |
|
$out = false; |
213
|
|
|
|
214
|
79 |
|
if ('' !== $root) { |
215
|
78 |
|
if (in_array(strtolower($root), $this->uncountable)) { |
216
|
6 |
|
$out = true; |
217
|
|
|
} else { |
218
|
72 |
|
$out = $this->isIrregular($this->irregular, $root); |
219
|
|
|
|
220
|
72 |
|
if (!$out) { |
221
|
57 |
|
$out = $this->isIrregular(array_keys($this->singular), $root); |
222
|
|
|
} |
223
|
|
|
|
224
|
72 |
|
if (!$out && 's' == $root[strlen($root) - 1]) { |
225
|
11 |
|
$out = true; |
226
|
|
|
} |
227
|
|
|
} |
228
|
|
|
} |
229
|
|
|
|
230
|
79 |
|
return $out; |
231
|
|
|
} |
232
|
|
|
|
233
|
|
|
/** |
234
|
|
|
* Check if $root word is singular. |
235
|
|
|
* |
236
|
|
|
* @param $root |
237
|
|
|
* |
238
|
|
|
* @return bool |
239
|
|
|
*/ |
240
|
77 |
|
public function isSingular(string $root): bool { |
241
|
77 |
|
$out = false; |
242
|
|
|
|
243
|
77 |
|
if ('' === $root) { |
244
|
1 |
|
$out = true; |
245
|
76 |
|
} elseif (in_array(strtolower($root), $this->uncountable)) { |
246
|
6 |
|
$out = true; |
247
|
70 |
|
} elseif (!$this->isAmbiguousPlural($root)) { |
248
|
68 |
|
$out = $this->isIrregular($this->irregular, $root); |
249
|
|
|
|
250
|
68 |
|
if (!$out) { |
251
|
66 |
|
$out = $this->isIrregular(array_keys($this->plural), $root); |
252
|
|
|
} |
253
|
|
|
|
254
|
68 |
|
if (!$out && 's' !== $root[strlen($root) - 1]) { |
255
|
14 |
|
$out = true; |
256
|
|
|
} |
257
|
|
|
} |
258
|
|
|
|
259
|
77 |
|
return $out; |
260
|
|
|
} |
261
|
|
|
|
262
|
|
|
/** |
263
|
|
|
* Pluralize/Singularize irregular forms. |
264
|
|
|
* |
265
|
|
|
* @param string $root The string to pluralize/singularize |
266
|
|
|
* @param array $irregular Array of irregular forms |
267
|
|
|
* |
268
|
|
|
* @return null|string |
269
|
|
|
*/ |
270
|
125 |
|
private function checkIrregularForm(string $root, array $irregular): ?string { |
271
|
125 |
|
foreach ($irregular as $pattern => $result) { |
272
|
125 |
|
$searchPattern = '/' . $pattern . '$/i'; |
273
|
125 |
|
if ($root !== $replacement = preg_replace($searchPattern, $result, $root)) { |
274
|
|
|
// look at the first char and see if it's upper case |
275
|
|
|
// I know it won't handle more than one upper case char here (but I'm OK with that) |
276
|
31 |
|
if (preg_match('/^[A-Z]/', $root)) { |
277
|
16 |
|
$replacement = ucfirst($replacement); |
278
|
|
|
} |
279
|
|
|
|
280
|
31 |
|
return $replacement; |
281
|
|
|
} |
282
|
|
|
} |
283
|
|
|
|
284
|
94 |
|
return null; |
285
|
|
|
} |
286
|
|
|
|
287
|
|
|
/** |
288
|
|
|
* @param string $root |
289
|
|
|
* @param array $irregular Array of irregular suffixes |
290
|
|
|
* |
291
|
|
|
* @return null|string |
292
|
|
|
*/ |
293
|
94 |
|
private function checkIrregularSuffix(string $root, array $irregular): ?string { |
294
|
94 |
|
foreach ($irregular as $pattern => $result) { |
295
|
94 |
|
$searchPattern = '/' . $pattern . '$/i'; |
296
|
94 |
|
if ($root !== $replacement = preg_replace($searchPattern, $result, $root)) { |
297
|
72 |
|
return $replacement; |
298
|
|
|
} |
299
|
|
|
} |
300
|
|
|
|
301
|
22 |
|
return null; |
302
|
|
|
} |
303
|
|
|
|
304
|
|
|
/** |
305
|
|
|
* @param $root |
306
|
|
|
* |
307
|
|
|
* @return bool |
308
|
|
|
*/ |
309
|
133 |
|
private function isAmbiguousPlural(string $root): bool { |
310
|
133 |
|
foreach ($this->ambiguous as $pattern) { |
311
|
133 |
|
if (preg_match('/' . $pattern . '$/i', $root)) { |
312
|
3 |
|
return true; |
313
|
|
|
} |
314
|
|
|
} |
315
|
|
|
|
316
|
131 |
|
return false; |
317
|
|
|
} |
318
|
|
|
|
319
|
|
|
/** |
320
|
|
|
* @param array $irregular |
321
|
|
|
* @param string $root |
322
|
|
|
* |
323
|
|
|
* @return bool |
324
|
|
|
*/ |
325
|
140 |
|
private function isIrregular(array $irregular, string $root): bool { |
326
|
140 |
|
foreach ($irregular as $pattern) { |
327
|
140 |
|
if (preg_match('/' . $pattern . '$/i', $root)) { |
328
|
100 |
|
return true; |
329
|
|
|
} |
330
|
|
|
} |
331
|
|
|
|
332
|
123 |
|
return false; |
333
|
|
|
} |
334
|
|
|
} |
335
|
|
|
|