Completed
Pull Request — master (#40)
by
unknown
01:17
created

Parser::setMaxCombinedInitials()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 6
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
1
<?php
2
3
namespace TheIconic\NameParser;
4
5
use TheIconic\NameParser\Language\English;
6
use TheIconic\NameParser\Mapper\NicknameMapper;
7
use TheIconic\NameParser\Mapper\SalutationMapper;
8
use TheIconic\NameParser\Mapper\SuffixMapper;
9
use TheIconic\NameParser\Mapper\InitialMapper;
10
use TheIconic\NameParser\Mapper\LastnameMapper;
11
use TheIconic\NameParser\Mapper\FirstnameMapper;
12
use TheIconic\NameParser\Mapper\MiddlenameMapper;
13
use TheIconic\NameParser\Mapper\CompanyMapper;
14
use TheIconic\NameParser\Mapper\ExtensionMapper;
15
use TheIconic\NameParser\Mapper\MultipartMapper;
16
17
class Parser
18
{
19
    /**
20
     * @var string
21
     */
22
    protected $whitespace = " \r\n\t";
23
24
    /**
25
     * @var array
26
     */
27
    protected $mappers = [];
28
29
    /**
30
     * @var array
31
     */
32
    protected $languages = [];
33
34
    /**
35
     * @var array
36
     */
37
    protected $nicknameDelimiters = [];
38
39
    /**
40
     * @var int
41
     */
42
    protected $maxSalutationIndex = 0;
43
44
    /**
45
     * @var int
46
     */
47
    protected $maxCombinedInitials = 2;
48
49
    public function __construct(array $languages = [])
50
    {
51
        if (empty($languages)) {
52
            $languages = [new English()];
53
        }
54
55
        $this->languages = $languages;
56
    }
57
58
    /**
59
     * split full names into the following parts:
60
     * - prefix / salutation  (Mr., Mrs., etc)
61
     * - given name / first name
62
     * - middle initials
63
     * - surname / last name
64
     * - suffix (II, Phd, Jr, etc)
65
     * - extension (Germany: nobility predicate is part of lastname)
66
     * - title (Germany: academic titles are usually used as name parts between salutation and given name)
67
     * - company (the string contains typical characteristics for a company name and is returned identically)
68
     *
69
     * @param string $name
70
     * @return Name
71
     */
72
    public function parse($name): Name
73
    {
74
        $name = $this->normalize($name);
75
76
        $segments = explode(',', $name);
77
78
        if (1 < count($segments)) {
79
            return $this->parseSplitName($segments[0], $segments[1], $segments[2] ?? '');
80
        } else {
81
            $mapped = $this->getCompany($name);
82
            if (count($mapped)) {
83
                return new Name($mapped);
84
            }
85
        }
86
87
        $parts = explode(' ', $name);
88
89
        foreach ($this->getMappers() as $mapper) {
90
            $parts = $mapper->map($parts);
91
        }
92
93
        return new Name($parts);
94
    }
95
96
    /**
97
     * handles split-parsing of comma-separated name parts
98
     *
99
     * @param string $first - the name part left of the comma
100
     * @param string $second - the name part right of the comma
101
     * @param string $third
102
     * @return Name
103
     */
104
    protected function parseSplitName($first, $second, $third): Name
105
    {
106
        $parts = array_merge(
107
            $this->getFirstSegmentParser()->parse($first)->getParts(),
108
            $this->getSecondSegmentParser()->parse($second)->getParts(),
109
            $this->getThirdSegmentParser()->parse($third)->getParts()
110
        );
111
112
        return new Name($parts);
113
    }
114
115
    /**
116
     * @return Parser
117
     */
118
    protected function getFirstSegmentParser(): Parser
119
    {
120
        $parser = new Parser();
121
122
        $parser->setMappers([
123
            new ExtensionMapper($this->getSamples('Extensions')),
124
            new MultipartMapper($this->getSamples('Titles'), 'title'),
125
            new MultipartMapper($this->getSamples('LastnamePrefixes'), 'prefix'),
126
            new SalutationMapper($this->getSamples('Salutations'), $this->getMaxSalutationIndex()),
127
            new SuffixMapper($this->getSamples('Suffixes'), false, 2),
128
            new LastnameMapper($this->getSamples('LastnamePrefixes'), true),
129
            new FirstnameMapper(),
130
            new MiddlenameMapper(),
131
        ]);
132
133
        return $parser;
134
    }
135
136
    /**
137
     * @return Parser
138
     */
139
    protected function getSecondSegmentParser(): Parser
140
    {
141
        $parser = new Parser();
142
143
        $parser->setMappers([
144
            new ExtensionMapper($this->getSamples('Extensions')),
145
            new MultipartMapper($this->getSamples('Titles'), 'title'),
146
            new MultipartMapper($this->getSamples('LastnamePrefixes'), 'prefix'),
147
            new SalutationMapper($this->getSamples('Salutations'), $this->getMaxSalutationIndex()),
148
            new SuffixMapper($this->getSamples('Suffixes'), true, 1),
149
            new NicknameMapper($this->getNicknameDelimiters()),
150
            new InitialMapper($this->getMaxCombinedInitials(), true),
151
            new FirstnameMapper(),
152
            new MiddlenameMapper(true),
153
        ]);
154
155
        return $parser;
156
    }
157
158
    protected function getThirdSegmentParser(): Parser
159
    {
160
        $parser = new Parser();
161
162
        $parser->setMappers([
163
            new SuffixMapper($this->getSamples('Suffixes'), true, 0),
164
        ]);
165
166
        return $parser;
167
    }
168
169
    /**
170
     * get the mappers for this parser
171
     *
172
     * @return array
173
     */
174
    public function getMappers(): array
175
    {
176
        if (empty($this->mappers)) {
177
            $this->setMappers([
178
                new ExtensionMapper($this->getSamples('Extensions')),
179
                new MultipartMapper($this->getSamples('Titles'), 'title'),
180
                new MultipartMapper($this->getSamples('LastnamePrefixes'), 'prefix'),
181
                new NicknameMapper($this->getNicknameDelimiters()),
182
                new SalutationMapper($this->getSamples('Salutations'), $this->getMaxSalutationIndex()),
183
                new SuffixMapper($this->getSamples('Suffixes')),
184
                new InitialMapper($this->getMaxCombinedInitials()),
185
                new LastnameMapper($this->getSamples('LastnamePrefixes')),
186
                new FirstnameMapper(),
187
                new MiddlenameMapper(),
188
            ]);
189
        }
190
191
        return $this->mappers;
192
    }
193
194
    /**
195
     * get name as company if parts matches company identifiers
196
     *
197
     * @param string $name
198
     * @return array
199
     */
200
    protected function getCompany(string $name): array
201
    {
202
        $mapper = new CompanyMapper($this->getSamples('Companies'));
203
204
        return $mapper->map([$name]);
205
    }
206
207
    /**
208
     * set the mappers for this parser
209
     *
210
     * @param array $mappers
211
     * @return Parser
212
     */
213
    public function setMappers(array $mappers): Parser
214
    {
215
        $this->mappers = $mappers;
216
217
        return $this;
218
    }
219
220
    /**
221
     * normalize the name
222
     *
223
     * @param string $name
224
     * @return string
225
     */
226
    protected function normalize(string $name): string
227
    {
228
        $whitespace = $this->getWhitespace();
229
230
        $name = trim($name);
231
232
        return preg_replace('/[' . preg_quote($whitespace) . ']+/', ' ', $name);
233
    }
234
235
    /**
236
     * get a string of characters that are supposed to be treated as whitespace
237
     *
238
     * @return string
239
     */
240
    public function getWhitespace(): string
241
    {
242
        return $this->whitespace;
243
    }
244
245
    /**
246
     * set the string of characters that are supposed to be treated as whitespace
247
     *
248
     * @param string $whitespace
249
     * @return Parser
250
     */
251
    public function setWhitespace($whitespace): Parser
252
    {
253
        $this->whitespace = $whitespace;
254
255
        return $this;
256
    }
257
258
    /**
259
     * @return array
260
     */
261
    protected function getSamples(string $sampleName): array
262
    {
263
        $samples = [];
264
        $method = sprintf('get%s', $sampleName);
265
        foreach ($this->languages as $language) {
266
            $samples += call_user_func_array([$language, $method], []);
267
        }
268
269
        return $samples;
270
    }
271
272
    /**
273
     * @return array
274
     */
275
    public function getNicknameDelimiters(): array
276
    {
277
        return $this->nicknameDelimiters;
278
    }
279
280
    /**
281
     * @param array $nicknameDelimiters
282
     * @return Parser
283
     */
284
    public function setNicknameDelimiters(array $nicknameDelimiters): Parser
285
    {
286
        $this->nicknameDelimiters = $nicknameDelimiters;
287
288
        return $this;
289
    }
290
291
    /**
292
     * @return int
293
     */
294
    public function getMaxSalutationIndex(): int
295
    {
296
        return $this->maxSalutationIndex;
297
    }
298
299
    /**
300
     * @param int $maxSalutationIndex
301
     * @return Parser
302
     */
303
    public function setMaxSalutationIndex(int $maxSalutationIndex): Parser
304
    {
305
        $this->maxSalutationIndex = $maxSalutationIndex;
306
307
        return $this;
308
    }
309
310
    /**
311
     * @return int
312
     */
313
    public function getMaxCombinedInitials(): int
314
    {
315
        return $this->maxCombinedInitials;
316
    }
317
318
    /**
319
     * @param int $maxCombinedInitials
320
     * @return Parser
321
     */
322
    public function setMaxCombinedInitials(int $maxCombinedInitials): Parser
323
    {
324
        $this->maxCombinedInitials = $maxCombinedInitials;
325
326
        return $this;
327
    }
328
}
329