1 | <?php |
||
17 | class Parser |
||
18 | { |
||
19 | /** |
||
20 | * @var string |
||
21 | */ |
||
22 | protected $whitespace = " \r\n\t"; |
||
23 | |||
24 | /** |
||
25 | * @var array |
||
26 | */ |
||
27 | protected $mappers = []; |
||
28 | |||
29 | /** |
||
30 | * @var array |
||
31 | */ |
||
32 | protected $languages = []; |
||
33 | |||
34 | /** |
||
35 | * @var array |
||
36 | */ |
||
37 | protected $nicknameDelimiters = []; |
||
38 | |||
39 | /** |
||
40 | * @var int |
||
41 | */ |
||
42 | protected $maxSalutationIndex = 0; |
||
43 | |||
44 | /** |
||
45 | * @var int |
||
46 | */ |
||
47 | protected $maxCombinedInitials = 2; |
||
48 | |||
49 | public function __construct(array $languages = []) |
||
57 | |||
58 | /** |
||
59 | * split full names into the following parts: |
||
60 | * - prefix / salutation (Mr., Mrs., etc) |
||
61 | * - given name / first name |
||
62 | * - middle initials |
||
63 | * - surname / last name |
||
64 | * - suffix (II, Phd, Jr, etc) |
||
65 | * - extension (Germany: nobility predicate is part of lastname) |
||
66 | * - title (Germany: academic titles are usually used as name parts between salutation and given name) |
||
67 | * - company (the string contains typical characteristics for a company name and is returned identically) |
||
68 | * |
||
69 | * @param string $name |
||
70 | * @return Name |
||
71 | */ |
||
72 | public function parse($name): Name |
||
95 | |||
96 | /** |
||
97 | * handles split-parsing of comma-separated name parts |
||
98 | * |
||
99 | * @param string $first - the name part left of the comma |
||
100 | * @param string $second - the name part right of the comma |
||
101 | * @param string $third |
||
102 | * @return Name |
||
103 | */ |
||
104 | protected function parseSplitName($first, $second, $third): Name |
||
114 | |||
115 | /** |
||
116 | * @return Parser |
||
117 | */ |
||
118 | protected function getFirstSegmentParser(): Parser |
||
119 | { |
||
120 | $parser = new Parser(); |
||
121 | |||
122 | $parser->setMappers([ |
||
123 | new ExtensionMapper($this->getSamples('Extensions')), |
||
124 | new MultipartMapper($this->getSamples('Titles'), 'title'), |
||
125 | new MultipartMapper($this->getSamples('LastnamePrefixes'), 'prefix'), |
||
126 | new SalutationMapper($this->getSamples('Salutations'), $this->getMaxSalutationIndex()), |
||
127 | new SuffixMapper($this->getSamples('Suffixes'), false, 2), |
||
128 | new LastnameMapper($this->getSamples('LastnamePrefixes'), true), |
||
129 | new FirstnameMapper(), |
||
130 | new MiddlenameMapper(), |
||
131 | ]); |
||
132 | |||
133 | return $parser; |
||
134 | } |
||
135 | |||
136 | /** |
||
137 | * @return Parser |
||
138 | */ |
||
139 | protected function getSecondSegmentParser(): Parser |
||
140 | { |
||
141 | $parser = new Parser(); |
||
142 | |||
143 | $parser->setMappers([ |
||
144 | new ExtensionMapper($this->getSamples('Extensions')), |
||
145 | new MultipartMapper($this->getSamples('Titles'), 'title'), |
||
146 | new MultipartMapper($this->getSamples('LastnamePrefixes'), 'prefix'), |
||
147 | new SalutationMapper($this->getSamples('Salutations'), $this->getMaxSalutationIndex()), |
||
148 | new SuffixMapper($this->getSamples('Suffixes'), true, 1), |
||
149 | new NicknameMapper($this->getNicknameDelimiters()), |
||
150 | new InitialMapper($this->getMaxCombinedInitials(), true), |
||
151 | new FirstnameMapper(), |
||
152 | new MiddlenameMapper(true), |
||
153 | ]); |
||
154 | |||
155 | return $parser; |
||
156 | } |
||
157 | |||
158 | protected function getThirdSegmentParser(): Parser |
||
168 | |||
169 | /** |
||
170 | * get the mappers for this parser |
||
171 | * |
||
172 | * @return array |
||
173 | */ |
||
174 | public function getMappers(): array |
||
193 | |||
194 | /** |
||
195 | * get name as company if parts matches company identifiers |
||
196 | * |
||
197 | * @param string $name |
||
198 | * @return array |
||
199 | */ |
||
200 | protected function getCompany(string $name): array |
||
206 | |||
207 | /** |
||
208 | * set the mappers for this parser |
||
209 | * |
||
210 | * @param array $mappers |
||
211 | * @return Parser |
||
212 | */ |
||
213 | public function setMappers(array $mappers): Parser |
||
219 | |||
220 | /** |
||
221 | * normalize the name |
||
222 | * |
||
223 | * @param string $name |
||
224 | * @return string |
||
225 | */ |
||
226 | protected function normalize(string $name): string |
||
234 | |||
235 | /** |
||
236 | * get a string of characters that are supposed to be treated as whitespace |
||
237 | * |
||
238 | * @return string |
||
239 | */ |
||
240 | public function getWhitespace(): string |
||
244 | |||
245 | /** |
||
246 | * set the string of characters that are supposed to be treated as whitespace |
||
247 | * |
||
248 | * @param string $whitespace |
||
249 | * @return Parser |
||
250 | */ |
||
251 | public function setWhitespace($whitespace): Parser |
||
257 | |||
258 | /** |
||
259 | * @return array |
||
260 | */ |
||
261 | protected function getSamples(string $sampleName): array |
||
271 | |||
272 | /** |
||
273 | * @return array |
||
274 | */ |
||
275 | public function getNicknameDelimiters(): array |
||
279 | |||
280 | /** |
||
281 | * @param array $nicknameDelimiters |
||
282 | * @return Parser |
||
283 | */ |
||
284 | public function setNicknameDelimiters(array $nicknameDelimiters): Parser |
||
290 | |||
291 | /** |
||
292 | * @return int |
||
293 | */ |
||
294 | public function getMaxSalutationIndex(): int |
||
298 | |||
299 | /** |
||
300 | * @param int $maxSalutationIndex |
||
301 | * @return Parser |
||
302 | */ |
||
303 | public function setMaxSalutationIndex(int $maxSalutationIndex): Parser |
||
309 | |||
310 | /** |
||
311 | * @return int |
||
312 | */ |
||
313 | public function getMaxCombinedInitials(): int |
||
317 | |||
318 | /** |
||
319 | * @param int $maxCombinedInitials |
||
320 | * @return Parser |
||
321 | */ |
||
322 | public function setMaxCombinedInitials(int $maxCombinedInitials): Parser |
||
328 | } |
||
329 |