1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace TheIconic\NameParser; |
4
|
|
|
|
5
|
|
|
use TheIconic\NameParser\Mapper\NicknameMapper; |
6
|
|
|
use TheIconic\NameParser\Mapper\SalutationMapper; |
7
|
|
|
use TheIconic\NameParser\Mapper\SuffixMapper; |
8
|
|
|
use TheIconic\NameParser\Mapper\InitialMapper; |
9
|
|
|
use TheIconic\NameParser\Mapper\LastnameMapper; |
10
|
|
|
use TheIconic\NameParser\Mapper\FirstnameMapper; |
11
|
|
|
use TheIconic\NameParser\Mapper\MiddlenameMapper; |
12
|
|
|
|
13
|
|
|
class Parser |
14
|
|
|
{ |
15
|
|
|
/** |
16
|
|
|
* @var string |
17
|
|
|
*/ |
18
|
|
|
protected $whitespace = " \r\n\t"; |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* @var array |
22
|
|
|
*/ |
23
|
|
|
protected $mappers = []; |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* split full names into the following parts: |
27
|
|
|
* - prefix / salutation (Mr., Mrs., etc) |
28
|
|
|
* - given name / first name |
29
|
|
|
* - middle initials |
30
|
|
|
* - surname / last name |
31
|
|
|
* - suffix (II, Phd, Jr, etc) |
32
|
|
|
* |
33
|
|
|
* @param string $name |
34
|
|
|
* @return Name |
35
|
|
|
*/ |
36
|
|
|
public function parse($name): Name |
37
|
|
|
{ |
38
|
|
|
$name = $this->normalize($name); |
39
|
|
|
|
40
|
|
|
$segments = explode(',', $name); |
41
|
|
|
|
42
|
|
|
if (1 < count($segments)) { |
43
|
|
|
return $this->parseSplitName($segments[0], $segments[1], $segments[2] ?? ''); |
44
|
|
|
|
45
|
|
|
} |
46
|
|
|
|
47
|
|
|
if (false !== $pos = strpos($name, ',')) { |
|
|
|
|
48
|
|
|
} |
49
|
|
|
|
50
|
|
|
$parts = explode(' ', $name); |
51
|
|
|
|
52
|
|
|
foreach ($this->getMappers() as $mapper) { |
53
|
|
|
$parts = $mapper->map($parts); |
54
|
|
|
} |
55
|
|
|
|
56
|
|
|
return new Name($parts); |
57
|
|
|
} |
58
|
|
|
|
59
|
|
|
/** |
60
|
|
|
* handles split-parsing of comma-separated name parts |
61
|
|
|
* |
62
|
|
|
* @param $left - the name part left of the comma |
63
|
|
|
* @param $right - the name part right of the comma |
64
|
|
|
* |
65
|
|
|
* @return Name |
66
|
|
|
*/ |
67
|
|
|
protected function parseSplitName($first, $second, $third): Name |
68
|
|
|
{ |
69
|
|
|
$parts = array_merge( |
70
|
|
|
$this->getFirstSegmentParser()->parse($first)->getParts(), |
71
|
|
|
$this->getSecondSegmentParser()->parse($second)->getParts(), |
72
|
|
|
$this->getThirdSegmentParser()->parse($third)->getParts() |
73
|
|
|
); |
74
|
|
|
|
75
|
|
|
return new Name($parts); |
76
|
|
|
} |
77
|
|
|
|
78
|
|
|
/** |
79
|
|
|
* @return Parser |
80
|
|
|
*/ |
81
|
|
|
protected function getFirstSegmentParser(): Parser |
82
|
|
|
{ |
83
|
|
|
$parser = new Parser(); |
84
|
|
|
$parser->setMappers([ |
85
|
|
|
new SalutationMapper(), |
86
|
|
|
new SuffixMapper(), |
87
|
|
|
new LastnameMapper(['match_single' => true]), |
88
|
|
|
new FirstnameMapper(), |
89
|
|
|
new MiddlenameMapper(), |
90
|
|
|
]); |
91
|
|
|
|
92
|
|
|
return $parser; |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
/** |
96
|
|
|
* @return Parser |
97
|
|
|
*/ |
98
|
|
|
protected function getSecondSegmentParser(): Parser |
99
|
|
|
{ |
100
|
|
|
$parser = new Parser(); |
101
|
|
|
$parser->setMappers([ |
102
|
|
|
new SalutationMapper(), |
103
|
|
|
new SuffixMapper(['match_single' => true]), |
104
|
|
|
new NicknameMapper(), |
105
|
|
|
new InitialMapper(['match_last' => true]), |
106
|
|
|
new FirstnameMapper(), |
107
|
|
|
new MiddlenameMapper(), |
108
|
|
|
]); |
109
|
|
|
|
110
|
|
|
return $parser; |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
protected function getThirdSegmentParser(): Parser |
114
|
|
|
{ |
115
|
|
|
$parser = new Parser(); |
116
|
|
|
$parser->setMappers([ |
117
|
|
|
new SuffixMapper(['match_single' => true]), |
118
|
|
|
]); |
119
|
|
|
|
120
|
|
|
return $parser; |
121
|
|
|
} |
122
|
|
|
|
123
|
|
|
/** |
124
|
|
|
* get the mappers for this parser |
125
|
|
|
* |
126
|
|
|
* @return array |
127
|
|
|
*/ |
128
|
|
|
public function getMappers(): array |
129
|
|
|
{ |
130
|
|
|
if (empty($this->mappers)) { |
131
|
|
|
$this->setMappers([ |
132
|
|
|
new NicknameMapper(), |
133
|
|
|
new SalutationMapper(), |
134
|
|
|
new SuffixMapper(), |
135
|
|
|
new InitialMapper(), |
136
|
|
|
new LastnameMapper(), |
137
|
|
|
new FirstnameMapper(), |
138
|
|
|
new MiddlenameMapper(), |
139
|
|
|
]); |
140
|
|
|
} |
141
|
|
|
|
142
|
|
|
return $this->mappers; |
143
|
|
|
} |
144
|
|
|
|
145
|
|
|
/** |
146
|
|
|
* set the mappers for this parser |
147
|
|
|
* |
148
|
|
|
* @param array $mappers |
149
|
|
|
* @return Parser |
150
|
|
|
*/ |
151
|
|
|
public function setMappers(array $mappers): Parser |
152
|
|
|
{ |
153
|
|
|
$this->mappers = $mappers; |
154
|
|
|
|
155
|
|
|
return $this; |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
/** |
159
|
|
|
* normalize the name |
160
|
|
|
* |
161
|
|
|
* @param string $name |
162
|
|
|
* @return string |
163
|
|
|
*/ |
164
|
|
|
protected function normalize(string $name): string |
165
|
|
|
{ |
166
|
|
|
$whitespace = $this->getWhitespace(); |
167
|
|
|
|
168
|
|
|
$name = trim($name); |
169
|
|
|
|
170
|
|
|
return preg_replace('/[' . preg_quote($whitespace) . ']+/', ' ', $name); |
171
|
|
|
} |
172
|
|
|
|
173
|
|
|
/** |
174
|
|
|
* get a string of characters that are supposed to be treated as whitespace |
175
|
|
|
* |
176
|
|
|
* @return string |
177
|
|
|
*/ |
178
|
|
|
public function getWhitespace(): string |
179
|
|
|
{ |
180
|
|
|
return $this->whitespace; |
181
|
|
|
} |
182
|
|
|
|
183
|
|
|
/** |
184
|
|
|
* set the string of characters that are supposed to be treated as whitespace |
185
|
|
|
* |
186
|
|
|
* @param $whitespace |
187
|
|
|
* @return Parser |
188
|
|
|
*/ |
189
|
|
|
public function setWhitespace($whitespace): Parser |
190
|
|
|
{ |
191
|
|
|
$this->whitespace = $whitespace; |
192
|
|
|
|
193
|
|
|
return $this; |
194
|
|
|
} |
195
|
|
|
} |
196
|
|
|
|
This check looks for the bodies of
if
statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.These
if
bodies can be removed. If you have an empty if but statements in theelse
branch, consider inverting the condition.could be turned into
This is much more concise to read.