1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace TheIconic\NameParser; |
4
|
|
|
|
5
|
|
|
use TheIconic\NameParser\Language\English; |
6
|
|
|
use TheIconic\NameParser\Mapper\NicknameMapper; |
7
|
|
|
use TheIconic\NameParser\Mapper\SalutationMapper; |
8
|
|
|
use TheIconic\NameParser\Mapper\SuffixMapper; |
9
|
|
|
use TheIconic\NameParser\Mapper\InitialMapper; |
10
|
|
|
use TheIconic\NameParser\Mapper\LastnameMapper; |
11
|
|
|
use TheIconic\NameParser\Mapper\FirstnameMapper; |
12
|
|
|
use TheIconic\NameParser\Mapper\MiddlenameMapper; |
13
|
|
|
|
14
|
|
|
class Parser |
15
|
|
|
{ |
16
|
|
|
/** |
17
|
|
|
* @var string |
18
|
|
|
*/ |
19
|
|
|
protected $whitespace = " \r\n\t"; |
20
|
|
|
|
21
|
|
|
/** |
22
|
|
|
* @var array |
23
|
|
|
*/ |
24
|
|
|
protected $mappers = []; |
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* @var array |
28
|
|
|
*/ |
29
|
|
|
protected $languages = []; |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* @var array |
33
|
|
|
*/ |
34
|
|
|
protected $nicknameDelimiters = []; |
35
|
|
|
|
36
|
|
|
public function __construct(array $languages = []) |
37
|
|
|
{ |
38
|
|
|
if (empty($languages)) { |
39
|
|
|
$languages = [new English()]; |
40
|
|
|
} |
41
|
|
|
|
42
|
|
|
$this->languages = $languages; |
43
|
|
|
} |
44
|
|
|
|
45
|
|
|
/** |
46
|
|
|
* split full names into the following parts: |
47
|
|
|
* - prefix / salutation (Mr., Mrs., etc) |
48
|
|
|
* - given name / first name |
49
|
|
|
* - middle initials |
50
|
|
|
* - surname / last name |
51
|
|
|
* - suffix (II, Phd, Jr, etc) |
52
|
|
|
* |
53
|
|
|
* @param string $name |
54
|
|
|
* @return Name |
55
|
|
|
*/ |
56
|
|
|
public function parse($name): Name |
57
|
|
|
{ |
58
|
|
|
$name = $this->normalize($name); |
59
|
|
|
|
60
|
|
|
$segments = explode(',', $name); |
61
|
|
|
|
62
|
|
|
if (1 < count($segments)) { |
63
|
|
|
return $this->parseSplitName($segments[0], $segments[1], $segments[2] ?? ''); |
64
|
|
|
} |
65
|
|
|
|
66
|
|
|
$parts = explode(' ', $name); |
67
|
|
|
|
68
|
|
|
foreach ($this->getMappers() as $mapper) { |
69
|
|
|
$parts = $mapper->map($parts); |
70
|
|
|
} |
71
|
|
|
|
72
|
|
|
return new Name($parts); |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
/** |
76
|
|
|
* handles split-parsing of comma-separated name parts |
77
|
|
|
* |
78
|
|
|
* @param $left - the name part left of the comma |
79
|
|
|
* @param $right - the name part right of the comma |
80
|
|
|
* |
81
|
|
|
* @return Name |
82
|
|
|
*/ |
83
|
|
|
protected function parseSplitName($first, $second, $third): Name |
84
|
|
|
{ |
85
|
|
|
$parts = array_merge( |
86
|
|
|
$this->getFirstSegmentParser()->parse($first)->getParts(), |
87
|
|
|
$this->getSecondSegmentParser()->parse($second)->getParts(), |
88
|
|
|
$this->getThirdSegmentParser()->parse($third)->getParts() |
89
|
|
|
); |
90
|
|
|
|
91
|
|
|
return new Name($parts); |
92
|
|
|
} |
93
|
|
|
|
94
|
|
|
/** |
95
|
|
|
* @return Parser |
96
|
|
|
*/ |
97
|
|
View Code Duplication |
protected function getFirstSegmentParser(): Parser |
|
|
|
|
98
|
|
|
{ |
99
|
|
|
$parser = new Parser(); |
100
|
|
|
|
101
|
|
|
$parser->setMappers([ |
102
|
|
|
new SalutationMapper($this->getSalutations()), |
103
|
|
|
new SuffixMapper($this->getSuffixes()), |
104
|
|
|
new LastnameMapper($this->getPrefixes(), true), |
105
|
|
|
new FirstnameMapper(), |
106
|
|
|
new MiddlenameMapper(), |
107
|
|
|
]); |
108
|
|
|
|
109
|
|
|
return $parser; |
110
|
|
|
} |
111
|
|
|
|
112
|
|
|
/** |
113
|
|
|
* @return Parser |
114
|
|
|
*/ |
115
|
|
View Code Duplication |
protected function getSecondSegmentParser(): Parser |
|
|
|
|
116
|
|
|
{ |
117
|
|
|
$parser = new Parser(); |
118
|
|
|
|
119
|
|
|
$parser->setMappers([ |
120
|
|
|
new SalutationMapper($this->getSalutations()), |
121
|
|
|
new SuffixMapper($this->getSuffixes(), true), |
122
|
|
|
new NicknameMapper($this->getNicknameDelimiters()), |
123
|
|
|
new InitialMapper(true), |
124
|
|
|
new FirstnameMapper(), |
125
|
|
|
new MiddlenameMapper(), |
126
|
|
|
]); |
127
|
|
|
|
128
|
|
|
return $parser; |
129
|
|
|
} |
130
|
|
|
|
131
|
|
|
protected function getThirdSegmentParser(): Parser |
132
|
|
|
{ |
133
|
|
|
$parser = new Parser(); |
134
|
|
|
|
135
|
|
|
$parser->setMappers([ |
136
|
|
|
new SuffixMapper($this->getSuffixes(), true), |
137
|
|
|
]); |
138
|
|
|
|
139
|
|
|
return $parser; |
140
|
|
|
} |
141
|
|
|
|
142
|
|
|
/** |
143
|
|
|
* get the mappers for this parser |
144
|
|
|
* |
145
|
|
|
* @return array |
146
|
|
|
*/ |
147
|
|
|
public function getMappers(): array |
148
|
|
|
{ |
149
|
|
|
if (empty($this->mappers)) { |
150
|
|
|
$this->setMappers([ |
151
|
|
|
new NicknameMapper($this->getNicknameDelimiters()), |
152
|
|
|
new SalutationMapper($this->getSalutations()), |
153
|
|
|
new SuffixMapper($this->getSuffixes()), |
154
|
|
|
new InitialMapper(), |
155
|
|
|
new LastnameMapper($this->getPrefixes()), |
156
|
|
|
new FirstnameMapper(), |
157
|
|
|
new MiddlenameMapper(), |
158
|
|
|
]); |
159
|
|
|
} |
160
|
|
|
|
161
|
|
|
return $this->mappers; |
162
|
|
|
} |
163
|
|
|
|
164
|
|
|
/** |
165
|
|
|
* set the mappers for this parser |
166
|
|
|
* |
167
|
|
|
* @param array $mappers |
168
|
|
|
* @return Parser |
169
|
|
|
*/ |
170
|
|
|
public function setMappers(array $mappers): Parser |
171
|
|
|
{ |
172
|
|
|
$this->mappers = $mappers; |
173
|
|
|
|
174
|
|
|
return $this; |
175
|
|
|
} |
176
|
|
|
|
177
|
|
|
/** |
178
|
|
|
* normalize the name |
179
|
|
|
* |
180
|
|
|
* @param string $name |
181
|
|
|
* @return string |
182
|
|
|
*/ |
183
|
|
|
protected function normalize(string $name): string |
184
|
|
|
{ |
185
|
|
|
$whitespace = $this->getWhitespace(); |
186
|
|
|
|
187
|
|
|
$name = trim($name); |
188
|
|
|
|
189
|
|
|
return preg_replace('/[' . preg_quote($whitespace) . ']+/', ' ', $name); |
190
|
|
|
} |
191
|
|
|
|
192
|
|
|
/** |
193
|
|
|
* get a string of characters that are supposed to be treated as whitespace |
194
|
|
|
* |
195
|
|
|
* @return string |
196
|
|
|
*/ |
197
|
|
|
public function getWhitespace(): string |
198
|
|
|
{ |
199
|
|
|
return $this->whitespace; |
200
|
|
|
} |
201
|
|
|
|
202
|
|
|
/** |
203
|
|
|
* set the string of characters that are supposed to be treated as whitespace |
204
|
|
|
* |
205
|
|
|
* @param $whitespace |
206
|
|
|
* @return Parser |
207
|
|
|
*/ |
208
|
|
|
public function setWhitespace($whitespace): Parser |
209
|
|
|
{ |
210
|
|
|
$this->whitespace = $whitespace; |
211
|
|
|
|
212
|
|
|
return $this; |
213
|
|
|
} |
214
|
|
|
|
215
|
|
|
/** |
216
|
|
|
* @return array |
217
|
|
|
*/ |
218
|
|
|
protected function getPrefixes() |
219
|
|
|
{ |
220
|
|
|
$prefixes = []; |
221
|
|
|
|
222
|
|
|
/** @var LanguageInterface $language */ |
223
|
|
|
foreach ($this->languages as $language) { |
224
|
|
|
$prefixes += $language->getLastnamePrefixes(); |
225
|
|
|
} |
226
|
|
|
|
227
|
|
|
return $prefixes; |
228
|
|
|
} |
229
|
|
|
|
230
|
|
|
/** |
231
|
|
|
* @return array |
232
|
|
|
*/ |
233
|
|
|
protected function getSuffixes() |
234
|
|
|
{ |
235
|
|
|
$suffixes = []; |
236
|
|
|
|
237
|
|
|
/** @var LanguageInterface $language */ |
238
|
|
|
foreach ($this->languages as $language) { |
239
|
|
|
$suffixes += $language->getSuffixes(); |
240
|
|
|
} |
241
|
|
|
|
242
|
|
|
return $suffixes; |
243
|
|
|
} |
244
|
|
|
|
245
|
|
|
/** |
246
|
|
|
* @return array |
247
|
|
|
*/ |
248
|
|
|
protected function getSalutations() |
249
|
|
|
{ |
250
|
|
|
$salutations = []; |
251
|
|
|
|
252
|
|
|
/** @var LanguageInterface $language */ |
253
|
|
|
foreach ($this->languages as $language) { |
254
|
|
|
$salutations += $language->getSalutations(); |
255
|
|
|
} |
256
|
|
|
|
257
|
|
|
return $salutations; |
258
|
|
|
} |
259
|
|
|
|
260
|
|
|
/** |
261
|
|
|
* @return array |
262
|
|
|
*/ |
263
|
|
|
public function getNicknameDelimiters(): array |
264
|
|
|
{ |
265
|
|
|
return $this->nicknameDelimiters; |
266
|
|
|
} |
267
|
|
|
|
268
|
|
|
/** |
269
|
|
|
* @param array $nicknameDelimiters |
270
|
|
|
* @return Parser |
271
|
|
|
*/ |
272
|
|
|
public function setNicknameDelimiters(array $nicknameDelimiters): Parser |
273
|
|
|
{ |
274
|
|
|
$this->nicknameDelimiters = $nicknameDelimiters; |
275
|
|
|
|
276
|
|
|
return $this; |
277
|
|
|
} |
278
|
|
|
} |
279
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.