Completed
Pull Request — master (#40)
by
unknown
01:11
created

Parser::getCompanies()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 11
rs 9.9
c 0
b 0
f 0
cc 2
nc 2
nop 0
1
<?php
2
3
namespace TheIconic\NameParser;
4
5
use TheIconic\NameParser\Language\English;
6
use TheIconic\NameParser\Mapper\NicknameMapper;
7
use TheIconic\NameParser\Mapper\SalutationMapper;
8
use TheIconic\NameParser\Mapper\SuffixMapper;
9
use TheIconic\NameParser\Mapper\InitialMapper;
10
use TheIconic\NameParser\Mapper\LastnameMapper;
11
use TheIconic\NameParser\Mapper\FirstnameMapper;
12
use TheIconic\NameParser\Mapper\MiddlenameMapper;
13
use TheIconic\NameParser\Mapper\CompanyMapper;
14
use TheIconic\NameParser\Mapper\ExtensionMapper;
15
use TheIconic\NameParser\Mapper\MultipartMapper;
16
17
class Parser
18
{
19
    /**
20
     * @var string
21
     */
22
    protected $whitespace = " \r\n\t";
23
24
    /**
25
     * @var array
26
     */
27
    protected $mappers = [];
28
29
    /**
30
     * @var array
31
     */
32
    protected $languages = [];
33
34
    /**
35
     * @var array
36
     */
37
    protected $nicknameDelimiters = [];
38
39
    /**
40
     * @var int
41
     */
42
    protected $maxSalutationIndex = 0;
43
44
    /**
45
     * @var int
46
     */
47
    protected $maxCombinedInitials = 2;
48
49
    public function __construct(array $languages = [])
50
    {
51
        if (empty($languages)) {
52
            $languages = [new English()];
53
        }
54
55
        $this->languages = $languages;
56
    }
57
58
    /**
59
     * split full names into the following parts:
60
     * - prefix / salutation  (Mr., Mrs., etc)
61
     * - given name / first name
62
     * - middle initials
63
     * - surname / last name
64
     * - suffix (II, Phd, Jr, etc)
65
     * - extension (Germany: nobility predicate is part of lastname)
66
     * - title (Germany: academic titles are usually used as name parts between salutation and given name)
67
     * - company (the string contains typical characteristics for a company name and is returned identically)
68
     *
69
     * @param string $name
70
     * @return Name
71
     */
72
    public function parse($name): Name
73
    {
74
        $name = $this->normalize($name);
75
76
        $segments = explode(',', $name);
77
78
        if (1 < count($segments)) {
79
            return $this->parseSplitName($segments[0], $segments[1], $segments[2] ?? '');
80
        } else {
81
            $mapped = $this->getCompany($name);
82
            if (count($mapped)) {
83
                return new Name($mapped);
84
            }
85
        }
86
87
        $parts = explode(' ', $name);
88
89
        foreach ($this->getMappers() as $mapper) {
90
            $parts = $mapper->map($parts);
91
        }
92
93
        return new Name($parts);
94
    }
95
96
    /**
97
     * handles split-parsing of comma-separated name parts
98
     *
99
     * @param string $first - the name part left of the comma
100
     * @param string $second - the name part right of the comma
101
     * @param string $third
102
     * @return Name
103
     */
104
    protected function parseSplitName($first, $second, $third): Name
105
    {
106
        $parts = array_merge(
107
            $this->getFirstSegmentParser()->parse($first)->getParts(),
108
            $this->getSecondSegmentParser()->parse($second)->getParts(),
109
            $this->getThirdSegmentParser()->parse($third)->getParts()
110
        );
111
112
        return new Name($parts);
113
    }
114
115
    /**
116
     * @return Parser
117
     */
118 View Code Duplication
    protected function getFirstSegmentParser(): Parser
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
119
    {
120
        $parser = new Parser();
121
122
        $parser->setMappers([
123
            new ExtensionMapper($this->getExtensions()),
124
            new MultipartMapper($this->getTitles(), 'title'),
125
            new MultipartMapper($this->getPrefixes(), 'prefix'),
126
            new SalutationMapper($this->getSalutations(), $this->getMaxSalutationIndex()),
127
            new SuffixMapper($this->getSuffixes(), false, 2),
128
            new LastnameMapper($this->getPrefixes(), true),
129
            new FirstnameMapper(),
130
            new MiddlenameMapper(),
131
        ]);
132
133
        return $parser;
134
    }
135
136
    /**
137
     * @return Parser
138
     */
139 View Code Duplication
    protected function getSecondSegmentParser(): Parser
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
140
    {
141
        $parser = new Parser();
142
143
        $parser->setMappers([
144
            new ExtensionMapper($this->getExtensions()),
145
            new MultipartMapper($this->getTitles(), 'title'),
146
            new MultipartMapper($this->getPrefixes(), 'prefix'),
147
            new SalutationMapper($this->getSalutations(), $this->getMaxSalutationIndex()),
148
            new SuffixMapper($this->getSuffixes(), true, 1),
149
            new NicknameMapper($this->getNicknameDelimiters()),
150
            new InitialMapper($this->getMaxCombinedInitials(), true),
151
            new FirstnameMapper(),
152
            new MiddlenameMapper(true),
153
        ]);
154
155
        return $parser;
156
    }
157
158
    protected function getThirdSegmentParser(): Parser
159
    {
160
        $parser = new Parser();
161
162
        $parser->setMappers([
163
            new SuffixMapper($this->getSuffixes(), true, 0),
164
        ]);
165
166
        return $parser;
167
    }
168
169
    /**
170
     * get the mappers for this parser
171
     *
172
     * @return array
173
     */
174
    public function getMappers(): array
175
    {
176
        if (empty($this->mappers)) {
177
            $this->setMappers([
178
                new ExtensionMapper($this->getExtensions()),
179
                new MultipartMapper($this->getTitles(), 'title'),
180
                new MultipartMapper($this->getPrefixes(), 'prefix'),
181
                new NicknameMapper($this->getNicknameDelimiters()),
182
                new SalutationMapper($this->getSalutations(), $this->getMaxSalutationIndex()),
183
                new SuffixMapper($this->getSuffixes()),
184
                new InitialMapper($this->getMaxCombinedInitials()),
185
                new LastnameMapper($this->getPrefixes()),
186
                new FirstnameMapper(),
187
                new MiddlenameMapper(),
188
            ]);
189
        }
190
191
        return $this->mappers;
192
    }
193
194
    /**
195
     * get name as company if parts matches company identifiers
196
     *
197
     * @param string $name
198
     * @return array
199
     */
200
    protected function getCompany(string $name): array
201
    {
202
        $mapper = new CompanyMapper($this->getCompanies());
203
204
        return $mapper->map([$name]);
205
    }
206
207
    /**
208
     * set the mappers for this parser
209
     *
210
     * @param array $mappers
211
     * @return Parser
212
     */
213
    public function setMappers(array $mappers): Parser
214
    {
215
        $this->mappers = $mappers;
216
217
        return $this;
218
    }
219
220
    /**
221
     * normalize the name
222
     *
223
     * @param string $name
224
     * @return string
225
     */
226
    protected function normalize(string $name): string
227
    {
228
        $whitespace = $this->getWhitespace();
229
230
        $name = trim($name);
231
232
        return preg_replace('/[' . preg_quote($whitespace) . ']+/', ' ', $name);
233
    }
234
235
    /**
236
     * get a string of characters that are supposed to be treated as whitespace
237
     *
238
     * @return string
239
     */
240
    public function getWhitespace(): string
241
    {
242
        return $this->whitespace;
243
    }
244
245
    /**
246
     * set the string of characters that are supposed to be treated as whitespace
247
     *
248
     * @param string $whitespace
249
     * @return Parser
250
     */
251
    public function setWhitespace($whitespace): Parser
252
    {
253
        $this->whitespace = $whitespace;
254
255
        return $this;
256
    }
257
258
    /**
259
     * @return array
260
     */
261
    protected function getPrefixes()
262
    {
263
        $prefixes = [];
264
265
        /** @var LanguageInterface $language */
266
        foreach ($this->languages as $language) {
267
            $prefixes += $language->getLastnamePrefixes();
268
        }
269
270
        return $prefixes;
271
    }
272
273
    /**
274
     * @return array
275
     */
276
    protected function getSuffixes()
277
    {
278
        $suffixes = [];
279
280
        /** @var LanguageInterface $language */
281
        foreach ($this->languages as $language) {
282
            $suffixes += $language->getSuffixes();
283
        }
284
285
        return $suffixes;
286
    }
287
288
    /**
289
     * @return array
290
     */
291
    protected function getSalutations()
292
    {
293
        $salutations = [];
294
295
        /** @var LanguageInterface $language */
296
        foreach ($this->languages as $language) {
297
            $salutations += $language->getSalutations();
298
        }
299
300
        return $salutations;
301
    }
302
303
    /**
304
     * @return array
305
     */
306
    protected function getExtensions()
307
    {
308
        $extensions = [];
309
310
        /** @var LanguageInterface $language */
311
        foreach ($this->languages as $language) {
312
            $extensions += $language->getExtensions();
313
        }
314
315
        return $extensions;
316
    }
317
318
    /**
319
     * @return array
320
     */
321
    protected function getTitles()
322
    {
323
        $titles = [];
324
325
        /** @var LanguageInterface $language */
326
        foreach ($this->languages as $language) {
327
            $titles += $language->getTitles();
328
        }
329
330
        return $titles;
331
    }
332
333
    /**
334
     * @return array
335
     */
336
    protected function getCompanies()
337
    {
338
        $companies = [];
339
340
        /** @var LanguageInterface $language */
341
        foreach ($this->languages as $language) {
342
            $companies += $language->getCompanies();
343
        }
344
345
        return $companies;
346
    }
347
348
    /**
349
     * @return array
350
     */
351
    public function getNicknameDelimiters(): array
352
    {
353
        return $this->nicknameDelimiters;
354
    }
355
356
    /**
357
     * @param array $nicknameDelimiters
358
     * @return Parser
359
     */
360
    public function setNicknameDelimiters(array $nicknameDelimiters): Parser
361
    {
362
        $this->nicknameDelimiters = $nicknameDelimiters;
363
364
        return $this;
365
    }
366
367
    /**
368
     * @return int
369
     */
370
    public function getMaxSalutationIndex(): int
371
    {
372
        return $this->maxSalutationIndex;
373
    }
374
375
    /**
376
     * @param int $maxSalutationIndex
377
     * @return Parser
378
     */
379
    public function setMaxSalutationIndex(int $maxSalutationIndex): Parser
380
    {
381
        $this->maxSalutationIndex = $maxSalutationIndex;
382
383
        return $this;
384
    }
385
386
    /**
387
     * @return int
388
     */
389
    public function getMaxCombinedInitials(): int
390
    {
391
        return $this->maxCombinedInitials;
392
    }
393
394
    /**
395
     * @param int $maxCombinedInitials
396
     * @return Parser
397
     */
398
    public function setMaxCombinedInitials(int $maxCombinedInitials): Parser
399
    {
400
        $this->maxCombinedInitials = $maxCombinedInitials;
401
402
        return $this;
403
    }
404
}
405