1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Egulias\EmailValidator\Parser; |
4
|
|
|
|
5
|
|
|
use Egulias\EmailValidator\EmailLexer; |
6
|
|
|
use Egulias\EmailValidator\Exception\CharNotAllowed; |
7
|
|
|
use Egulias\EmailValidator\Exception\CommaInDomain; |
8
|
|
|
use Egulias\EmailValidator\Exception\ConsecutiveAt; |
9
|
|
|
use Egulias\EmailValidator\Exception\CRLFAtTheEnd; |
10
|
|
|
use Egulias\EmailValidator\Exception\CRNoLF; |
11
|
|
|
use Egulias\EmailValidator\Exception\DomainHyphened; |
12
|
|
|
use Egulias\EmailValidator\Exception\DotAtEnd; |
13
|
|
|
use Egulias\EmailValidator\Exception\DotAtStart; |
14
|
|
|
use Egulias\EmailValidator\Exception\ExpectingATEXT; |
15
|
|
|
use Egulias\EmailValidator\Exception\ExpectingDomainLiteralClose; |
16
|
|
|
use Egulias\EmailValidator\Exception\ExpectingDTEXT; |
17
|
|
|
use Egulias\EmailValidator\Exception\NoDomainPart; |
18
|
|
|
use Egulias\EmailValidator\Exception\UnopenedComment; |
19
|
|
|
use Egulias\EmailValidator\Warning\AddressLiteral; |
20
|
|
|
use Egulias\EmailValidator\Warning\CFWSWithFWS; |
21
|
|
|
use Egulias\EmailValidator\Warning\DeprecatedComment; |
22
|
|
|
use Egulias\EmailValidator\Warning\DomainLiteral; |
23
|
|
|
use Egulias\EmailValidator\Warning\DomainTooLong; |
24
|
|
|
use Egulias\EmailValidator\Warning\IPV6BadChar; |
25
|
|
|
use Egulias\EmailValidator\Warning\IPV6ColonEnd; |
26
|
|
|
use Egulias\EmailValidator\Warning\IPV6ColonStart; |
27
|
|
|
use Egulias\EmailValidator\Warning\IPV6Deprecated; |
28
|
|
|
use Egulias\EmailValidator\Warning\IPV6DoubleColon; |
29
|
|
|
use Egulias\EmailValidator\Warning\IPV6GroupCount; |
30
|
|
|
use Egulias\EmailValidator\Warning\IPV6MaxGroups; |
31
|
|
|
use Egulias\EmailValidator\Warning\LabelTooLong; |
32
|
|
|
use Egulias\EmailValidator\Warning\ObsoleteDTEXT; |
33
|
|
|
use Egulias\EmailValidator\Warning\TLD; |
34
|
|
|
|
35
|
|
|
class DomainPart extends Parser |
36
|
|
|
{ |
37
|
|
|
const DOMAIN_MAX_LENGTH = 254; |
38
|
|
|
protected $domainPart = ''; |
39
|
|
|
|
40
|
92 |
|
public function parse($domainPart) |
41
|
|
|
{ |
42
|
92 |
|
$this->lexer->moveNext(); |
43
|
|
|
|
44
|
92 |
|
$this->performDomainStartChecks(); |
45
|
|
|
|
46
|
82 |
|
$domain = $this->doParseDomainPart(); |
47
|
|
|
|
48
|
55 |
|
$prev = $this->lexer->getPrevious(); |
49
|
55 |
|
$length = strlen($domain); |
50
|
|
|
|
51
|
55 |
|
if ($prev['type'] === EmailLexer::S_DOT) { |
52
|
2 |
|
throw new DotAtEnd(); |
53
|
|
|
} |
54
|
53 |
|
if ($prev['type'] === EmailLexer::S_HYPHEN) { |
55
|
1 |
|
throw new DomainHyphened(); |
56
|
|
|
} |
57
|
52 |
|
if ($length > self::DOMAIN_MAX_LENGTH) { |
58
|
2 |
|
$this->warnings[DomainTooLong::CODE] = new DomainTooLong(); |
59
|
2 |
|
} |
60
|
52 |
|
if ($prev['type'] === EmailLexer::S_CR) { |
61
|
|
|
throw new CRLFAtTheEnd(); |
62
|
|
|
} |
63
|
52 |
|
$this->domainPart = $domain; |
64
|
52 |
|
} |
65
|
|
|
|
66
|
92 |
|
private function performDomainStartChecks() |
67
|
|
|
{ |
68
|
92 |
|
$this->checkInvalidTokensAfterAT(); |
69
|
91 |
|
$this->checkEmptyDomain(); |
70
|
|
|
|
71
|
84 |
|
if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) { |
72
|
3 |
|
$this->warnings[DeprecatedComment::CODE] = new DeprecatedComment(); |
73
|
3 |
|
$this->parseDomainComments(); |
74
|
1 |
|
} |
75
|
82 |
|
} |
76
|
|
|
|
77
|
91 |
|
private function checkEmptyDomain() |
78
|
|
|
{ |
79
|
91 |
|
$thereIsNoDomain = $this->lexer->token['type'] === EmailLexer::S_EMPTY || |
80
|
85 |
|
($this->lexer->token['type'] === EmailLexer::S_SP && |
81
|
91 |
|
!$this->lexer->isNextToken(EmailLexer::GENERIC)); |
82
|
|
|
|
83
|
91 |
|
if ($thereIsNoDomain) { |
84
|
7 |
|
throw new NoDomainPart(); |
85
|
|
|
} |
86
|
84 |
|
} |
87
|
|
|
|
88
|
92 |
|
private function checkInvalidTokensAfterAT() |
89
|
|
|
{ |
90
|
92 |
|
if ($this->lexer->token['type'] === EmailLexer::S_DOT) { |
91
|
1 |
|
throw new DotAtStart(); |
92
|
|
|
} |
93
|
91 |
|
if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN) { |
94
|
|
|
throw new DomainHyphened(); |
95
|
|
|
} |
96
|
91 |
|
} |
97
|
|
|
|
98
|
52 |
|
public function getDomainPart() |
99
|
|
|
{ |
100
|
52 |
|
return $this->domainPart; |
101
|
|
|
} |
102
|
|
|
|
103
|
7 |
|
public function checkIPV6Tag($addressLiteral, $maxGroups = 8) |
104
|
|
|
{ |
105
|
7 |
|
$prev = $this->lexer->getPrevious(); |
106
|
7 |
|
if ($prev['type'] === EmailLexer::S_COLON) { |
107
|
1 |
|
$this->warnings[IPV6ColonEnd::CODE] = new IPV6ColonEnd(); |
108
|
1 |
|
} |
109
|
|
|
|
110
|
7 |
|
$IPv6 = substr($addressLiteral, 5); |
111
|
|
|
//Daniel Marschall's new IPv6 testing strategy |
112
|
7 |
|
$matchesIP = explode(':', $IPv6); |
113
|
7 |
|
$groupCount = count($matchesIP); |
114
|
7 |
|
$colons = strpos($IPv6, '::'); |
115
|
|
|
|
116
|
7 |
|
if (count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) !== 0) { |
117
|
1 |
|
$this->warnings[IPV6BadChar::CODE] = new IPV6BadChar(); |
118
|
1 |
|
} |
119
|
|
|
|
120
|
7 |
|
if ($colons === false) { |
121
|
|
|
// We need exactly the right number of groups |
122
|
4 |
|
if ($groupCount !== $maxGroups) { |
123
|
1 |
|
$this->warnings[IPV6GroupCount::CODE] = new IPV6GroupCount(); |
124
|
1 |
|
} |
125
|
4 |
|
return; |
126
|
|
|
} |
127
|
|
|
|
128
|
3 |
|
if ($colons !== strrpos($IPv6, '::')) { |
129
|
1 |
|
$this->warnings[IPV6DoubleColon::CODE] = new IPV6DoubleColon(); |
130
|
1 |
|
return; |
131
|
|
|
} |
132
|
|
|
|
133
|
2 |
|
if ($colons === 0 || $colons === (strlen($IPv6) - 2)) { |
134
|
|
|
// RFC 4291 allows :: at the start or end of an address |
135
|
|
|
//with 7 other groups in addition |
136
|
2 |
|
++$maxGroups; |
137
|
2 |
|
} |
138
|
|
|
|
139
|
2 |
|
if ($groupCount > $maxGroups) { |
140
|
1 |
|
$this->warnings[IPV6MaxGroups::CODE] = new IPV6MaxGroups(); |
141
|
2 |
|
} elseif ($groupCount === $maxGroups) { |
142
|
1 |
|
$this->warnings[IPV6Deprecated::CODE] = new IPV6Deprecated(); |
143
|
1 |
|
} |
144
|
2 |
|
} |
145
|
|
|
|
146
|
82 |
|
protected function doParseDomainPart() |
147
|
|
|
{ |
148
|
82 |
|
$domain = ''; |
149
|
82 |
|
$openedParenthesis = 0; |
150
|
|
|
do { |
151
|
82 |
|
$prev = $this->lexer->getPrevious(); |
152
|
|
|
|
153
|
82 |
|
$this->checkNotAllowedChars($this->lexer->token); |
154
|
|
|
|
155
|
82 |
|
if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) { |
156
|
3 |
|
$this->parseComments(); |
157
|
3 |
|
$openedParenthesis += $this->getOpenedParenthesis(); |
158
|
3 |
|
$this->lexer->moveNext(); |
159
|
3 |
|
$tmpPrev = $this->lexer->getPrevious(); |
160
|
3 |
|
if ($tmpPrev['type'] === EmailLexer::S_CLOSEPARENTHESIS) { |
161
|
3 |
|
$openedParenthesis--; |
162
|
3 |
|
} |
163
|
3 |
|
} |
164
|
82 |
View Code Duplication |
if ($this->lexer->token['type'] === EmailLexer::S_CLOSEPARENTHESIS) { |
|
|
|
|
165
|
3 |
|
if ($openedParenthesis === 0) { |
166
|
3 |
|
throw new UnopenedComment(); |
167
|
|
|
} else { |
168
|
|
|
$openedParenthesis--; |
169
|
|
|
} |
170
|
|
|
} |
171
|
|
|
|
172
|
81 |
|
$this->checkConsecutiveDots(); |
173
|
81 |
|
$this->checkDomainPartExceptions($prev); |
174
|
|
|
|
175
|
80 |
|
if ($this->hasBrackets()) { |
176
|
13 |
|
$this->parseDomainLiteral(); |
177
|
11 |
|
} |
178
|
|
|
|
179
|
78 |
|
$this->checkLabelLength($prev); |
180
|
|
|
|
181
|
78 |
|
if ($this->isFWS()) { |
182
|
9 |
|
$this->parseFWS(); |
183
|
7 |
|
} |
184
|
|
|
|
185
|
78 |
|
$domain .= $this->lexer->token['value']; |
186
|
78 |
|
$this->lexer->moveNext(); |
187
|
78 |
|
} while ($this->lexer->token); |
188
|
|
|
|
189
|
55 |
|
return $domain; |
190
|
|
|
} |
191
|
|
|
|
192
|
82 |
|
private function checkNotAllowedChars($token) |
193
|
|
|
{ |
194
|
82 |
|
$notAllowed = [EmailLexer::S_BACKSLASH => true, EmailLexer::S_SLASH=> true]; |
195
|
82 |
|
if (isset($notAllowed[$token['type']])) { |
196
|
8 |
|
throw new CharNotAllowed(); |
197
|
|
|
} |
198
|
82 |
|
} |
199
|
|
|
|
200
|
13 |
|
protected function parseDomainLiteral() |
201
|
|
|
{ |
202
|
13 |
|
if ($this->lexer->isNextToken(EmailLexer::S_COLON)) { |
203
|
|
|
$this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart(); |
204
|
|
|
} |
205
|
13 |
|
if ($this->lexer->isNextToken(EmailLexer::S_IPV6TAG)) { |
206
|
7 |
|
$lexer = clone $this->lexer; |
207
|
7 |
|
$lexer->moveNext(); |
208
|
7 |
|
if ($lexer->isNextToken(EmailLexer::S_DOUBLECOLON)) { |
209
|
1 |
|
$this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart(); |
210
|
1 |
|
} |
211
|
7 |
|
} |
212
|
|
|
|
213
|
13 |
|
return $this->doParseDomainLiteral(); |
214
|
|
|
} |
215
|
|
|
|
216
|
13 |
|
protected function doParseDomainLiteral() |
217
|
|
|
{ |
218
|
13 |
|
$IPv6TAG = false; |
219
|
13 |
|
$addressLiteral = ''; |
220
|
|
|
do { |
221
|
13 |
|
if ($this->lexer->token['type'] === EmailLexer::C_NUL) { |
222
|
|
|
throw new ExpectingDTEXT(); |
223
|
|
|
} |
224
|
|
|
|
225
|
13 |
|
if ($this->lexer->token['type'] === EmailLexer::INVALID || |
226
|
13 |
|
$this->lexer->token['type'] === EmailLexer::C_DEL || |
227
|
13 |
|
$this->lexer->token['type'] === EmailLexer::S_LF |
228
|
13 |
|
) { |
229
|
1 |
|
$this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT(); |
230
|
1 |
|
} |
231
|
|
|
|
232
|
13 |
|
if ($this->lexer->isNextTokenAny(array(EmailLexer::S_OPENQBRACKET, EmailLexer::S_OPENBRACKET))) { |
233
|
1 |
|
throw new ExpectingDTEXT(); |
234
|
|
|
} |
235
|
|
|
|
236
|
12 |
|
if ($this->lexer->isNextTokenAny( |
237
|
12 |
|
array(EmailLexer::S_HTAB, EmailLexer::S_SP, $this->lexer->token['type'] === EmailLexer::CRLF) |
238
|
12 |
|
)) { |
239
|
|
|
$this->warnings[CFWSWithFWS::CODE] = new CFWSWithFWS(); |
240
|
|
|
$this->parseFWS(); |
241
|
|
|
} |
242
|
|
|
|
243
|
12 |
|
if ($this->lexer->isNextToken(EmailLexer::S_CR)) { |
244
|
1 |
|
throw new CRNoLF(); |
245
|
|
|
} |
246
|
|
|
|
247
|
11 |
|
if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH) { |
248
|
|
|
$this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT(); |
249
|
|
|
$addressLiteral .= $this->lexer->token['value']; |
250
|
|
|
$this->lexer->moveNext(); |
251
|
|
|
$this->validateQuotedPair(); |
252
|
|
|
} |
253
|
11 |
|
if ($this->lexer->token['type'] === EmailLexer::S_IPV6TAG) { |
254
|
7 |
|
$IPv6TAG = true; |
255
|
7 |
|
} |
256
|
11 |
|
if ($this->lexer->token['type'] === EmailLexer::S_CLOSEQBRACKET) { |
257
|
|
|
break; |
258
|
|
|
} |
259
|
|
|
|
260
|
11 |
|
$addressLiteral .= $this->lexer->token['value']; |
261
|
|
|
|
262
|
11 |
|
} while ($this->lexer->moveNext()); |
263
|
|
|
|
264
|
11 |
|
$addressLiteral = str_replace('[', '', $addressLiteral); |
265
|
11 |
|
$addressLiteral = $this->checkIPV4Tag($addressLiteral); |
266
|
|
|
|
267
|
11 |
|
if (false === $addressLiteral) { |
268
|
1 |
|
return $addressLiteral; |
269
|
|
|
} |
270
|
|
|
|
271
|
10 |
|
if (!$IPv6TAG) { |
272
|
3 |
|
$this->warnings[DomainLiteral::CODE] = new DomainLiteral(); |
273
|
3 |
|
return $addressLiteral; |
274
|
|
|
} |
275
|
|
|
|
276
|
7 |
|
$this->warnings[AddressLiteral::CODE] = new AddressLiteral(); |
277
|
|
|
|
278
|
7 |
|
$this->checkIPV6Tag($addressLiteral); |
279
|
|
|
|
280
|
7 |
|
return $addressLiteral; |
281
|
|
|
} |
282
|
|
|
|
283
|
11 |
|
protected function checkIPV4Tag($addressLiteral) |
284
|
|
|
{ |
285
|
11 |
|
$matchesIP = array(); |
286
|
|
|
|
287
|
|
|
// Extract IPv4 part from the end of the address-literal (if there is one) |
288
|
11 |
|
if (preg_match( |
289
|
11 |
|
'/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', |
290
|
11 |
|
$addressLiteral, |
291
|
|
|
$matchesIP |
292
|
11 |
|
) > 0 |
293
|
11 |
|
) { |
294
|
2 |
|
$index = strrpos($addressLiteral, $matchesIP[0]); |
295
|
2 |
|
if ($index === 0) { |
296
|
1 |
|
$this->warnings[AddressLiteral::CODE] = new AddressLiteral(); |
297
|
1 |
|
return false; |
298
|
|
|
} |
299
|
|
|
// Convert IPv4 part to IPv6 format for further testing |
300
|
1 |
|
$addressLiteral = substr($addressLiteral, 0, $index) . '0:0'; |
301
|
1 |
|
} |
302
|
|
|
|
303
|
10 |
|
return $addressLiteral; |
304
|
|
|
} |
305
|
|
|
|
306
|
81 |
|
protected function checkDomainPartExceptions($prev) |
307
|
|
|
{ |
308
|
|
|
$invalidDomainTokens = array( |
309
|
81 |
|
EmailLexer::S_DQUOTE => true, |
310
|
81 |
|
EmailLexer::S_SEMICOLON => true, |
311
|
81 |
|
EmailLexer::S_GREATERTHAN => true, |
312
|
81 |
|
EmailLexer::S_LOWERTHAN => true, |
313
|
81 |
|
); |
314
|
|
|
|
315
|
81 |
|
if (isset($invalidDomainTokens[$this->lexer->token['type']])) { |
316
|
4 |
|
throw new ExpectingATEXT(); |
317
|
|
|
} |
318
|
|
|
|
319
|
81 |
|
if ($this->lexer->token['type'] === EmailLexer::S_COMMA) { |
320
|
1 |
|
throw new CommaInDomain(); |
321
|
|
|
} |
322
|
|
|
|
323
|
81 |
|
if ($this->lexer->token['type'] === EmailLexer::S_AT) { |
324
|
2 |
|
throw new ConsecutiveAt(); |
325
|
|
|
} |
326
|
|
|
|
327
|
80 |
|
if ($this->lexer->token['type'] === EmailLexer::S_OPENQBRACKET && $prev['type'] !== EmailLexer::S_AT) { |
328
|
1 |
|
throw new ExpectingATEXT(); |
329
|
|
|
} |
330
|
|
|
|
331
|
80 |
View Code Duplication |
if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN && $this->lexer->isNextToken(EmailLexer::S_DOT)) { |
|
|
|
|
332
|
1 |
|
throw new DomainHyphened(); |
333
|
|
|
} |
334
|
|
|
|
335
|
80 |
View Code Duplication |
if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH |
|
|
|
|
336
|
80 |
|
&& $this->lexer->isNextToken(EmailLexer::GENERIC)) { |
337
|
|
|
throw new ExpectingATEXT(); |
338
|
|
|
} |
339
|
80 |
|
} |
340
|
|
|
|
341
|
80 |
|
protected function hasBrackets() |
342
|
|
|
{ |
343
|
80 |
|
if ($this->lexer->token['type'] !== EmailLexer::S_OPENBRACKET) { |
344
|
67 |
|
return false; |
345
|
|
|
} |
346
|
|
|
|
347
|
|
|
try { |
348
|
13 |
|
$this->lexer->find(EmailLexer::S_CLOSEBRACKET); |
349
|
13 |
|
} catch (\RuntimeException $e) { |
350
|
|
|
throw new ExpectingDomainLiteralClose(); |
351
|
|
|
} |
352
|
|
|
|
353
|
13 |
|
return true; |
354
|
|
|
} |
355
|
|
|
|
356
|
78 |
|
protected function checkLabelLength($prev) |
357
|
|
|
{ |
358
|
78 |
|
if ($this->lexer->token['type'] === EmailLexer::S_DOT && |
359
|
78 |
|
$prev['type'] === EmailLexer::GENERIC && |
360
|
41 |
|
strlen($prev['value']) > 63 |
361
|
78 |
|
) { |
362
|
1 |
|
$this->warnings[LabelTooLong::CODE] = new LabelTooLong(); |
363
|
1 |
|
} |
364
|
78 |
|
} |
365
|
|
|
|
366
|
3 |
|
protected function parseDomainComments() |
367
|
|
|
{ |
368
|
3 |
|
$this->isUnclosedComment(); |
369
|
1 |
|
while (!$this->lexer->isNextToken(EmailLexer::S_CLOSEPARENTHESIS)) { |
370
|
1 |
|
$this->warnEscaping(); |
371
|
1 |
|
$this->lexer->moveNext(); |
372
|
1 |
|
} |
373
|
|
|
|
374
|
1 |
|
$this->lexer->moveNext(); |
375
|
1 |
|
if ($this->lexer->isNextToken(EmailLexer::S_DOT)) { |
376
|
|
|
throw new ExpectingATEXT(); |
377
|
|
|
} |
378
|
1 |
|
} |
379
|
|
|
|
380
|
|
|
protected function addTLDWarnings() |
381
|
|
|
{ |
382
|
|
|
if ($this->warnings[DomainLiteral::CODE]) { |
383
|
|
|
$this->warnings[TLD::CODE] = new TLD(); |
384
|
|
|
} |
385
|
|
|
} |
386
|
|
|
} |
387
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.