Total Complexity | 204 |
Total Lines | 1695 |
Duplicated Lines | 0 % |
Changes | 24 | ||
Bugs | 5 | Features | 3 |
Complex classes like Email often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Email, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
64 | class Email |
||
65 | { |
||
66 | /*:diagnostic constants start:*/ |
||
67 | |||
68 | // Categories |
||
69 | protected const VALID_CATEGORY = 1; |
||
70 | protected const DNSWARN = 7; |
||
71 | protected const RFC5321 = 15; |
||
72 | protected const CFWS = 31; |
||
73 | protected const DEPREC = 63; |
||
74 | protected const RFC5322 = 127; |
||
75 | protected const ERR = 255; |
||
76 | |||
77 | // Diagnoses |
||
78 | // Address is valid |
||
79 | protected const VALID = 0; |
||
80 | // Address is valid but a DNS check was not successful |
||
81 | protected const DNSWARN_NO_MX_RECORD = 5; |
||
82 | protected const DNSWARN_NO_RECORD = 6; |
||
83 | // Address is valid for SMTP but has unusual elements |
||
84 | protected const RFC5321_TLD = 9; |
||
85 | protected const RFC5321_TLDNUMERIC = 10; |
||
86 | protected const RFC5321_QUOTEDSTRING = 11; |
||
87 | protected const RFC5321_ADDRESSLITERAL = 12; |
||
88 | protected const RFC5321_IPV6DEPRECATED = 13; |
||
89 | // Address is valid within the message but cannot be used unmodified for the envelope |
||
90 | protected const CFWS_COMMENT = 17; |
||
91 | protected const CFWS_FWS = 18; |
||
92 | // Address contains deprecated elements but may still be valid in restricted contexts |
||
93 | protected const DEPREC_LOCALPART = 33; |
||
94 | protected const DEPREC_FWS = 34; |
||
95 | protected const DEPREC_QTEXT = 35; |
||
96 | protected const DEPREC_QP = 36; |
||
97 | protected const DEPREC_COMMENT = 37; |
||
98 | protected const DEPREC_CTEXT = 38; |
||
99 | protected const DEPREC_CFWS_NEAR_AT = 49; |
||
100 | // The address is only valid according to the broad definition of RFC 5322. |
||
101 | // It is otherwise invalid. |
||
102 | protected const RFC5322_DOMAIN = 65; |
||
103 | protected const RFC5322_TOOLONG = 66; |
||
104 | protected const RFC5322_LOCAL_TOOLONG = 67; |
||
105 | protected const RFC5322_DOMAIN_TOOLONG = 68; |
||
106 | protected const RFC5322_LABEL_TOOLONG = 69; |
||
107 | protected const RFC5322_DOMAINLITERAL = 70; |
||
108 | protected const RFC5322_DOMLIT_OBSDTEXT = 71; |
||
109 | protected const RFC5322_IPV6_GRPCOUNT = 72; |
||
110 | protected const RFC5322_IPV6_2X2XCOLON = 73; |
||
111 | protected const RFC5322_IPV6_BADCHAR = 74; |
||
112 | protected const RFC5322_IPV6_MAXGRPS = 75; |
||
113 | protected const RFC5322_IPV6_COLONSTRT = 76; |
||
114 | protected const RFC5322_IPV6_COLONEND = 77; |
||
115 | // Address is invalid for any purpose |
||
116 | protected const ERR_EXPECTING_DTEXT = 129; |
||
117 | protected const ERR_NOLOCALPART = 130; |
||
118 | protected const ERR_NODOMAIN = 131; |
||
119 | protected const ERR_CONSECUTIVEDOTS = 132; |
||
120 | protected const ERR_ATEXT_AFTER_CFWS = 133; |
||
121 | protected const ERR_ATEXT_AFTER_QS = 134; |
||
122 | protected const ERR_ATEXT_AFTER_DOMLIT = 135; |
||
123 | protected const ERR_EXPECTING_QPAIR = 136; |
||
124 | protected const ERR_EXPECTING_ATEXT = 137; |
||
125 | protected const ERR_EXPECTING_QTEXT = 138; |
||
126 | protected const ERR_EXPECTING_CTEXT = 139; |
||
127 | protected const ERR_BACKSLASHEND = 140; |
||
128 | protected const ERR_DOT_START = 141; |
||
129 | protected const ERR_DOT_END = 142; |
||
130 | protected const ERR_DOMAINHYPHENSTART = 143; |
||
131 | protected const ERR_DOMAINHYPHENEND = 144; |
||
132 | protected const ERR_UNCLOSEDQUOTEDSTR = 145; |
||
133 | protected const ERR_UNCLOSEDCOMMENT = 146; |
||
134 | protected const ERR_UNCLOSEDDOMLIT = 147; |
||
135 | protected const ERR_FWS_CRLF_X2 = 148; |
||
136 | protected const ERR_FWS_CRLF_END = 149; |
||
137 | protected const ERR_CR_NO_LF = 150; |
||
138 | /*:diagnostic constants end:*/ |
||
139 | |||
140 | // function control |
||
141 | protected const THRESHOLD = 16; |
||
142 | |||
143 | // Email parts |
||
144 | protected const COMPONENT_LOCALPART = 0; |
||
145 | protected const COMPONENT_DOMAIN = 1; |
||
146 | protected const COMPONENT_LITERAL = 2; |
||
147 | protected const CONTEXT_COMMENT = 3; |
||
148 | protected const CONTEXT_FWS = 4; |
||
149 | protected const CONTEXT_QUOTEDSTRING = 5; |
||
150 | protected const CONTEXT_QUOTEDPAIR = 6; |
||
151 | |||
152 | // Miscellaneous string constants |
||
153 | protected const STRING_AT = '@'; |
||
154 | protected const STRING_BACKSLASH = '\\'; |
||
155 | protected const STRING_DOT = '.'; |
||
156 | protected const STRING_DQUOTE = '"'; |
||
157 | protected const STRING_OPENPARENTHESIS = '('; |
||
158 | protected const STRING_CLOSEPARENTHESIS = ')'; |
||
159 | protected const STRING_OPENSQBRACKET = '['; |
||
160 | protected const STRING_CLOSESQBRACKET = ']'; |
||
161 | protected const STRING_HYPHEN = '-'; |
||
162 | protected const STRING_COLON = ':'; |
||
163 | protected const STRING_DOUBLECOLON = '::'; |
||
164 | protected const STRING_SP = ' '; |
||
165 | protected const STRING_HTAB = "\t"; |
||
166 | protected const STRING_CR = "\r"; |
||
167 | protected const STRING_LF = "\n"; |
||
168 | protected const STRING_IPV6TAG = 'IPv6:'; |
||
169 | |||
170 | // US-ASCII visible characters not valid for atext |
||
171 | // <http://tools.ietf.org/html/rfc5322#section-3.2.3> |
||
172 | protected const STRING_SPECIALS = '()<>[]:;@\\,."'; |
||
173 | |||
174 | /** |
||
175 | * |
||
176 | * The email address being checked. |
||
177 | * |
||
178 | * @var string |
||
179 | * |
||
180 | */ |
||
181 | protected $email; |
||
182 | |||
183 | /** |
||
184 | * |
||
185 | * Check DNS as part of validation? |
||
186 | * |
||
187 | * @var bool |
||
188 | * |
||
189 | */ |
||
190 | protected $checkDns; |
||
191 | |||
192 | /** |
||
193 | * |
||
194 | * The validation threshold level. |
||
195 | * |
||
196 | * @var int |
||
197 | * |
||
198 | */ |
||
199 | protected $threshold; |
||
200 | |||
201 | /** |
||
202 | * |
||
203 | * Diagnose errors? |
||
204 | * |
||
205 | * @var bool |
||
206 | * |
||
207 | */ |
||
208 | protected $diagnose; |
||
209 | |||
210 | /** |
||
211 | * |
||
212 | * Has DNS been checked? |
||
213 | * |
||
214 | * @var bool |
||
215 | * |
||
216 | */ |
||
217 | protected $dnsChecked; |
||
218 | |||
219 | /** |
||
220 | * |
||
221 | * The return status. |
||
222 | * |
||
223 | * @var int |
||
224 | * |
||
225 | */ |
||
226 | protected $returnStatus; |
||
227 | |||
228 | /** |
||
229 | * |
||
230 | * The length of the email address being checked. |
||
231 | * |
||
232 | * @var int |
||
233 | * |
||
234 | */ |
||
235 | protected $rawLength; |
||
236 | |||
237 | /** |
||
238 | * |
||
239 | * The current parser context. |
||
240 | * |
||
241 | * @var int |
||
242 | * |
||
243 | */ |
||
244 | protected $context; |
||
245 | |||
246 | /** |
||
247 | * |
||
248 | * Parser context stack. |
||
249 | * |
||
250 | * @var array |
||
251 | * |
||
252 | */ |
||
253 | protected $contextStack; |
||
254 | |||
255 | /** |
||
256 | * |
||
257 | * The prior parser context. |
||
258 | * |
||
259 | * @var int |
||
260 | * |
||
261 | */ |
||
262 | protected $contextPrior; |
||
263 | |||
264 | /** |
||
265 | * |
||
266 | * The current token being parsed. |
||
267 | * |
||
268 | * @var string |
||
269 | * |
||
270 | */ |
||
271 | protected $token; |
||
272 | |||
273 | /** |
||
274 | * |
||
275 | * The previous token being parsed. |
||
276 | * |
||
277 | * @var string |
||
278 | * |
||
279 | */ |
||
280 | protected $tokenPrior; |
||
281 | |||
282 | /** |
||
283 | * |
||
284 | * The components of the address. |
||
285 | * |
||
286 | * @var array |
||
287 | * |
||
288 | */ |
||
289 | protected $parseData; |
||
290 | |||
291 | /** |
||
292 | * |
||
293 | * The dot-atom elements of the address. |
||
294 | * |
||
295 | * @var array |
||
296 | * |
||
297 | */ |
||
298 | protected $atomList; |
||
299 | |||
300 | /** |
||
301 | * |
||
302 | * Element count. |
||
303 | * |
||
304 | * @var int |
||
305 | * |
||
306 | */ |
||
307 | protected $elementCount; |
||
308 | |||
309 | /** |
||
310 | * |
||
311 | * Element length. |
||
312 | * |
||
313 | * @var int |
||
314 | * |
||
315 | */ |
||
316 | protected $elementLen; |
||
317 | |||
318 | /** |
||
319 | * |
||
320 | * Is a hyphen allowed? |
||
321 | * |
||
322 | * @var bool |
||
323 | * |
||
324 | */ |
||
325 | protected $hyphenFlag; |
||
326 | |||
327 | /** |
||
328 | * |
||
329 | * CFWS can only appear at the end of the element |
||
330 | * |
||
331 | * @var bool |
||
332 | * |
||
333 | */ |
||
334 | protected $endOrDie; |
||
335 | |||
336 | /** |
||
337 | * |
||
338 | * Current position in the email string. |
||
339 | * |
||
340 | * @var int |
||
341 | * |
||
342 | */ |
||
343 | protected $pos; |
||
344 | |||
345 | /** |
||
346 | * |
||
347 | * Count of CRLF occurrences. |
||
348 | * |
||
349 | * @var null|int |
||
350 | * |
||
351 | */ |
||
352 | protected $crlfCount; |
||
353 | |||
354 | /** |
||
355 | * |
||
356 | * The final status of email validation. |
||
357 | * |
||
358 | * @var int |
||
359 | * |
||
360 | */ |
||
361 | protected $finalStatus; |
||
362 | |||
363 | /** |
||
364 | * |
||
365 | * Validates that the value is an email address. |
||
366 | * |
||
367 | * @param object $subject The subject to be filtered. |
||
368 | * |
||
369 | * @param string $field The subject field name. |
||
370 | * |
||
371 | * @return bool True if valid, false if not. |
||
372 | * |
||
373 | */ |
||
374 | public function __invoke($subject, $field) |
||
375 | { |
||
376 | $email = $subject->$field; |
||
377 | if ($this->intl()) { |
||
378 | $email = $this->idnToAscii($email); |
||
379 | } |
||
380 | return $this->isEmail($email); |
||
|
|||
381 | } |
||
382 | |||
383 | /** |
||
384 | * |
||
385 | * Is the intl extension loaded? |
||
386 | * |
||
387 | * |
||
388 | */ |
||
389 | protected function intl(): bool |
||
390 | { |
||
391 | return extension_loaded('intl'); |
||
392 | } |
||
393 | |||
394 | /** |
||
395 | * |
||
396 | * Converts an international domain in the email address to ASCII. |
||
397 | * |
||
398 | * @param string $email The email address to check. |
||
399 | * |
||
400 | * @return string The email with the IDN converted to ASCII (if possible). |
||
401 | * |
||
402 | */ |
||
403 | protected function idnToAscii(string $email): string |
||
404 | { |
||
405 | $parts = explode('@', $email); |
||
406 | $domain = array_pop($parts); |
||
407 | if (! $parts) { |
||
408 | // no parts remaining, so no @ symbol, so not valid to begin with |
||
409 | return $email; |
||
410 | } |
||
411 | |||
412 | // put the parts back together, with the domain part converted to ascii |
||
413 | return implode('@', $parts) . '@' . idn_to_ascii($domain, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46); |
||
414 | } |
||
415 | |||
416 | /** |
||
417 | * |
||
418 | * Checks that an email address conforms to RFCs 5321, 5322 and others, |
||
419 | * allowing for international domain names when the intl extension is |
||
420 | * loaded. |
||
421 | * |
||
422 | * @param string $email The email address to check. |
||
423 | * |
||
424 | * @param bool $checkDns Make a DNS check for MX records? |
||
425 | * |
||
426 | * @param mixed $errorlevel Determines the boundary between valid and |
||
427 | * invalid addresses. Status codes above this number will be returned as- |
||
428 | * is, status codes below will be returned as Email::VALID. Thus the |
||
429 | * calling program can simply look for Email::VALID if it is only |
||
430 | * interested in whether an address is valid or not. The errorlevel will |
||
431 | * determine how "picky" is_email() is about the address. If omitted or |
||
432 | * passed as false then isEmail() will return true or false rather than |
||
433 | * an integer error or warning. N.B.: Note the difference between |
||
434 | * $errorlevel = false and $errorlevel = 0. |
||
435 | * |
||
436 | */ |
||
437 | protected function isEmail(string $email, bool $checkDns = false, $errorlevel = false) |
||
438 | { |
||
439 | $this->reset($email, $checkDns, $errorlevel); |
||
440 | $this->parse(); |
||
441 | $this->checkDns(); |
||
442 | $this->checkTld(); |
||
443 | $this->finalStatus(); |
||
444 | return ($this->diagnose) |
||
445 | ? $this->finalStatus |
||
446 | : ($this->finalStatus < Email::THRESHOLD); |
||
447 | } |
||
448 | |||
449 | /** |
||
450 | * |
||
451 | * Resets the validation rule for a new email address. |
||
452 | * |
||
453 | * @param string $email The email address to check. |
||
454 | * |
||
455 | * @param bool $checkDns Make a DNS check for MX records? |
||
456 | * |
||
457 | * @param mixed $errorlevel Determines the boundary between valid and |
||
458 | * invalid addresses. |
||
459 | * |
||
460 | * @return null |
||
461 | * |
||
462 | */ |
||
463 | protected function reset(string $email, bool $checkDns, $errorlevel): void |
||
464 | { |
||
465 | $this->email = $email; |
||
466 | |||
467 | $this->checkDns = $checkDns; |
||
468 | $this->dnsChecked = false; |
||
469 | |||
470 | $this->setThresholdDiagnose($errorlevel); |
||
471 | |||
472 | $this->returnStatus = array(Email::VALID); |
||
473 | $this->rawLength = strlen($this->email); |
||
474 | |||
475 | // Where we are |
||
476 | $this->context = Email::COMPONENT_LOCALPART; |
||
477 | |||
478 | // Where we have been |
||
479 | $this->contextStack = array($this->context); |
||
480 | |||
481 | // Where we just came from |
||
482 | $this->contextPrior = Email::COMPONENT_LOCALPART; |
||
483 | |||
484 | // The current character |
||
485 | $this->token = ''; |
||
486 | |||
487 | // The previous character |
||
488 | $this->tokenPrior = ''; |
||
489 | |||
490 | // For the components of the address |
||
491 | $this->parseData = array( |
||
492 | Email::COMPONENT_LOCALPART => '', |
||
493 | Email::COMPONENT_DOMAIN => '' |
||
494 | ); |
||
495 | |||
496 | // For the dot-atom elements of the address |
||
497 | $this->atomList = array( |
||
498 | Email::COMPONENT_LOCALPART => array(''), |
||
499 | Email::COMPONENT_DOMAIN => array('') |
||
500 | ); |
||
501 | |||
502 | $this->elementCount = 0; |
||
503 | $this->elementLen = 0; |
||
504 | |||
505 | // Hyphen cannot occur at the end of a subdomain |
||
506 | $this->hyphenFlag = false; |
||
507 | |||
508 | // CFWS can only appear at the end of the element |
||
509 | $this->endOrDie = false; |
||
510 | |||
511 | $this->finalStatus = null; |
||
512 | |||
513 | $this->crlfCount = null; |
||
514 | } |
||
515 | |||
516 | /** |
||
517 | * |
||
518 | * Sets the $threshold and $diagnose properties. |
||
519 | * |
||
520 | * @param mixed $errorlevel Determines the boundary between valid and |
||
521 | * invalid addresses. |
||
522 | * |
||
523 | * @return null |
||
524 | * |
||
525 | */ |
||
526 | protected function setThresholdDiagnose($errorlevel): void |
||
527 | { |
||
528 | if (is_bool($errorlevel)) { |
||
529 | $this->threshold = Email::VALID; |
||
530 | $this->diagnose = (bool) $errorlevel; |
||
531 | return; |
||
532 | } |
||
533 | |||
534 | $this->diagnose = true; |
||
535 | |||
536 | switch ((int) $errorlevel) { |
||
537 | case E_WARNING: |
||
538 | // For backward compatibility |
||
539 | $this->threshold = Email::THRESHOLD; |
||
540 | break; |
||
541 | case E_ERROR: |
||
542 | // For backward compatibility |
||
543 | $this->threshold = Email::VALID; |
||
544 | break; |
||
545 | default: |
||
546 | $this->threshold = (int) $errorlevel; |
||
547 | } |
||
548 | } |
||
549 | |||
550 | /** |
||
551 | * |
||
552 | * Parse the address into components, character by character. |
||
553 | * |
||
554 | * @return null |
||
555 | * |
||
556 | */ |
||
557 | protected function parse(): void |
||
558 | { |
||
559 | for ($this->pos = 0; $this->pos < $this->rawLength; $this->pos++) { |
||
560 | $this->token = $this->email[$this->pos]; |
||
561 | $this->parseContext(); |
||
562 | if ((int) max($this->returnStatus) > Email::RFC5322) { |
||
563 | // No point going on if we've got a fatal error |
||
564 | break; |
||
565 | } |
||
566 | } |
||
567 | $this->parseFinal(); |
||
568 | } |
||
569 | |||
570 | /** |
||
571 | * |
||
572 | * Parse for the current context. |
||
573 | * |
||
574 | * @return null |
||
575 | * |
||
576 | */ |
||
577 | protected function parseContext(): void |
||
578 | { |
||
579 | switch ($this->context) { |
||
580 | case Email::COMPONENT_LOCALPART: |
||
581 | $this->parseComponentLocalPart(); |
||
582 | break; |
||
583 | case Email::COMPONENT_DOMAIN: |
||
584 | $this->parseComponentDomain(); |
||
585 | break; |
||
586 | case Email::COMPONENT_LITERAL: |
||
587 | $this->parseComponentLiteral(); |
||
588 | break; |
||
589 | case Email::CONTEXT_QUOTEDSTRING: |
||
590 | $this->parseContextQuotedString(); |
||
591 | break; |
||
592 | case Email::CONTEXT_QUOTEDPAIR: |
||
593 | $this->parseContextQuotedPair(); |
||
594 | break; |
||
595 | case Email::CONTEXT_COMMENT: |
||
596 | $this->parseContextComment(); |
||
597 | break; |
||
598 | case Email::CONTEXT_FWS: |
||
599 | $this->parseContextFws(); |
||
600 | break; |
||
601 | default: |
||
602 | throw new Exception("Unknown context: {$this->context}"); |
||
603 | } |
||
604 | } |
||
605 | |||
606 | /** |
||
607 | * |
||
608 | * Parse for the local part component. |
||
609 | * |
||
610 | * @return null |
||
611 | * |
||
612 | */ |
||
613 | protected function parseComponentLocalPart(): void |
||
614 | { |
||
615 | // http://tools.ietf.org/html/rfc5322#section-3.4.1 |
||
616 | // local-part = dot-atom / quoted-string / obs-local-part |
||
617 | // |
||
618 | // dot-atom = [CFWS] dot-atom-text [CFWS] |
||
619 | // |
||
620 | // dot-atom-text = 1*atext *("." 1*atext) |
||
621 | // |
||
622 | // quoted-string = [CFWS] |
||
623 | // DQUOTE *([FWS] qcontent) [FWS] DQUOTE |
||
624 | // [CFWS] |
||
625 | // |
||
626 | // obs-local-part = word *("." word) |
||
627 | // |
||
628 | // word = atom / quoted-string |
||
629 | // |
||
630 | // atom = [CFWS] 1*atext [CFWS] |
||
631 | switch ($this->token) { |
||
632 | |||
633 | // Comment |
||
634 | case Email::STRING_OPENPARENTHESIS: |
||
635 | if ($this->elementLen === 0) { |
||
636 | // Comments are OK at the beginning of an element |
||
637 | $this->returnStatus[] = ($this->elementCount === 0) |
||
638 | ? Email::CFWS_COMMENT |
||
639 | : Email::DEPREC_COMMENT; |
||
640 | } else { |
||
641 | // We can't start a comment in the middle of an element, so this better be the end |
||
642 | $this->returnStatus[] = Email::CFWS_COMMENT; |
||
643 | $this->endOrDie = true; |
||
644 | } |
||
645 | |||
646 | $this->contextStack[] = $this->context; |
||
647 | $this->context = Email::CONTEXT_COMMENT; |
||
648 | break; |
||
649 | |||
650 | // Next dot-atom element |
||
651 | case Email::STRING_DOT: |
||
652 | if ($this->elementLen === 0) { |
||
653 | // Another dot, already? |
||
654 | // Fatal error |
||
655 | $this->returnStatus[] = ($this->elementCount === 0) |
||
656 | ? Email::ERR_DOT_START |
||
657 | : Email::ERR_CONSECUTIVEDOTS; |
||
658 | } else { |
||
659 | // The entire local-part can be a quoted string for RFC 5321 |
||
660 | // If it's just one atom that is quoted then it's an RFC 5322 obsolete form |
||
661 | if ($this->endOrDie) { |
||
662 | $this->returnStatus[] = Email::DEPREC_LOCALPART; |
||
663 | } |
||
664 | } |
||
665 | |||
666 | // CFWS & quoted strings are OK again now we're at the beginning of an element (although they are obsolete forms) |
||
667 | $this->endOrDie = false; |
||
668 | $this->elementLen = 0; |
||
669 | $this->elementCount++; |
||
670 | $this->parseData[Email::COMPONENT_LOCALPART] .= $this->token; |
||
671 | $this->atomList[Email::COMPONENT_LOCALPART][$this->elementCount] = ''; |
||
672 | |||
673 | break; |
||
674 | |||
675 | // Quoted string |
||
676 | case Email::STRING_DQUOTE: |
||
677 | if ($this->elementLen === 0) { |
||
678 | // The entire local-part can be a quoted string for RFC 5321 |
||
679 | // If it's just one atom that is quoted then it's an RFC 5322 obsolete form |
||
680 | $this->returnStatus[] = ($this->elementCount === 0) |
||
681 | ? Email::RFC5321_QUOTEDSTRING |
||
682 | : Email::DEPREC_LOCALPART; |
||
683 | |||
684 | $this->parseData[Email::COMPONENT_LOCALPART] .= $this->token; |
||
685 | $this->atomList[Email::COMPONENT_LOCALPART][$this->elementCount] .= $this->token; |
||
686 | $this->elementLen++; |
||
687 | // Quoted string must be the entire element |
||
688 | $this->endOrDie = true; |
||
689 | $this->contextStack[] = $this->context; |
||
690 | $this->context = Email::CONTEXT_QUOTEDSTRING; |
||
691 | } else { |
||
692 | // Fatal error |
||
693 | $this->returnStatus[] = Email::ERR_EXPECTING_ATEXT; |
||
694 | } |
||
695 | |||
696 | break; |
||
697 | |||
698 | // Folding White Space |
||
699 | case Email::STRING_CR: |
||
700 | case Email::STRING_SP: |
||
701 | case Email::STRING_HTAB: |
||
702 | if (($this->token === Email::STRING_CR) && ((++$this->pos === $this->rawLength) || ($this->email[$this->pos] !== Email::STRING_LF))) { |
||
703 | // Fatal error |
||
704 | $this->returnStatus[] = Email::ERR_CR_NO_LF; |
||
705 | break; |
||
706 | } |
||
707 | |||
708 | if ($this->elementLen === 0) { |
||
709 | $this->returnStatus[] = ($this->elementCount === 0) ? Email::CFWS_FWS : Email::DEPREC_FWS; |
||
710 | } else { |
||
711 | // We can't start FWS in the middle of an element, so this better be the end |
||
712 | $this->endOrDie = true; |
||
713 | } |
||
714 | |||
715 | $this->contextStack[] = $this->context; |
||
716 | $this->context = Email::CONTEXT_FWS; |
||
717 | $this->tokenPrior = $this->token; |
||
718 | |||
719 | break; |
||
720 | |||
721 | // @ |
||
722 | case Email::STRING_AT: |
||
723 | // At this point we should have a valid local-part |
||
724 | if (count((array) $this->contextStack) !== 1) { |
||
725 | throw new Exception('Unexpected item on context stack'); |
||
726 | } |
||
727 | |||
728 | if ($this->parseData[Email::COMPONENT_LOCALPART] === '') { |
||
729 | // Fatal error |
||
730 | $this->returnStatus[] = Email::ERR_NOLOCALPART; |
||
731 | } elseif ($this->elementLen === 0) { |
||
732 | // Fatal error |
||
733 | $this->returnStatus[] = Email::ERR_DOT_END; |
||
734 | } elseif (strlen($this->parseData[Email::COMPONENT_LOCALPART]) > 64) { |
||
735 | // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.1 |
||
736 | // The maximum total length of a user name or other local-part is 64 |
||
737 | // octets. |
||
738 | $this->returnStatus[] = Email::RFC5322_LOCAL_TOOLONG; |
||
739 | } elseif (($this->contextPrior === Email::CONTEXT_COMMENT) || ($this->contextPrior === Email::CONTEXT_FWS)) { |
||
740 | // http://tools.ietf.org/html/rfc5322#section-3.4.1 |
||
741 | // Comments and folding white space |
||
742 | // SHOULD NOT be used around the "@" in the addr-spec. |
||
743 | // |
||
744 | // http://tools.ietf.org/html/rfc2119 |
||
745 | // 4. SHOULD NOT This phrase, or the phrase "NOT RECOMMENDED" mean that |
||
746 | // there may exist valid reasons in particular circumstances when the |
||
747 | // particular behavior is acceptable or even useful, but the full |
||
748 | // implications should be understood and the case carefully weighed |
||
749 | // before implementing any behavior described with this label. |
||
750 | $this->returnStatus[] = Email::DEPREC_CFWS_NEAR_AT; |
||
751 | } |
||
752 | |||
753 | // Clear everything down for the domain parsing |
||
754 | $this->context = Email::COMPONENT_DOMAIN; // Where we are |
||
755 | $this->contextStack = array($this->context); // Where we have been |
||
756 | $this->elementCount = 0; |
||
757 | $this->elementLen = 0; |
||
758 | $this->endOrDie = false; // CFWS can only appear at the end of the element |
||
759 | |||
760 | break; |
||
761 | |||
762 | // atext |
||
763 | default: |
||
764 | // http://tools.ietf.org/html/rfc5322#section-3.2.3 |
||
765 | // atext = ALPHA / DIGIT / ; Printable US-ASCII |
||
766 | // "!" / "#" / ; characters not including |
||
767 | // "$" / "%" / ; specials. Used for atoms. |
||
768 | // "&" / "'" / |
||
769 | // "*" / "+" / |
||
770 | // "-" / "/" / |
||
771 | // " = " / "?" / |
||
772 | // "^" / "_" / |
||
773 | // "`" / "{" / |
||
774 | // "|" / "}" / |
||
775 | // "~" |
||
776 | if ($this->endOrDie) { |
||
777 | // We have encountered atext where it is no longer valid |
||
778 | switch ($this->contextPrior) { |
||
779 | case Email::CONTEXT_COMMENT: |
||
780 | case Email::CONTEXT_FWS: |
||
781 | $this->returnStatus[] = Email::ERR_ATEXT_AFTER_CFWS; |
||
782 | break; |
||
783 | case Email::CONTEXT_QUOTEDSTRING: |
||
784 | $this->returnStatus[] = Email::ERR_ATEXT_AFTER_QS; |
||
785 | break; |
||
786 | default: |
||
787 | throw new Exception("More atext found where none is allowed, but unrecognised prior context: {$this->contextPrior}"); |
||
788 | } |
||
789 | } else { |
||
790 | $this->contextPrior = $this->context; |
||
791 | $ord = ord($this->token); |
||
792 | |||
793 | if (($ord < 33) || ($ord > 126) || ($ord === 10) || (!is_bool(strpos(Email::STRING_SPECIALS, $this->token)))) { |
||
794 | // Fatal error |
||
795 | $this->returnStatus[] = Email::ERR_EXPECTING_ATEXT; |
||
796 | } |
||
797 | |||
798 | $this->parseData[Email::COMPONENT_LOCALPART] .= $this->token; |
||
799 | $this->atomList[Email::COMPONENT_LOCALPART][$this->elementCount] .= $this->token; |
||
800 | $this->elementLen++; |
||
801 | } |
||
802 | } |
||
803 | } |
||
804 | |||
805 | /** |
||
806 | * |
||
807 | * Parse for the domain component. |
||
808 | * |
||
809 | * @return null |
||
810 | * |
||
811 | */ |
||
812 | protected function parseComponentDomain(): void |
||
813 | { |
||
814 | // http://tools.ietf.org/html/rfc5322#section-3.4.1 |
||
815 | // domain = dot-atom / domain-literal / obs-domain |
||
816 | // |
||
817 | // dot-atom = [CFWS] dot-atom-text [CFWS] |
||
818 | // |
||
819 | // dot-atom-text = 1*atext *("." 1*atext) |
||
820 | // |
||
821 | // domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] |
||
822 | // |
||
823 | // dtext = %d33-90 / ; Printable US-ASCII |
||
824 | // %d94-126 / ; characters not including |
||
825 | // obs-dtext ; "[", "]", or "\" |
||
826 | // |
||
827 | // obs-domain = atom *("." atom) |
||
828 | // |
||
829 | // atom = [CFWS] 1*atext [CFWS] |
||
830 | |||
831 | // http://tools.ietf.org/html/rfc5321#section-4.1.2 |
||
832 | // Mailbox = Local-part "@" ( Domain / address-literal ) |
||
833 | // |
||
834 | // Domain = sub-domain *("." sub-domain) |
||
835 | // |
||
836 | // address-literal = "[" ( IPv4-address-literal / |
||
837 | // IPv6-address-literal / |
||
838 | // General-address-literal ) "]" |
||
839 | // ; See Section 4.1.3 |
||
840 | |||
841 | // http://tools.ietf.org/html/rfc5322#section-3.4.1 |
||
842 | // Note: A liberal syntax for the domain portion of addr-spec is |
||
843 | // given here. However, the domain portion contains addressing |
||
844 | // information specified by and used in other protocols (e.g., |
||
845 | // [RFC1034], [RFC1035], [RFC1123], [RFC5321]). It is therefore |
||
846 | // incumbent upon implementations to conform to the syntax of |
||
847 | // addresses for the context in which they are used. |
||
848 | // is_email() author's note: it's not clear how to interpret this in |
||
849 | // the context of a general email address validator. The conclusion I |
||
850 | // have reached is this: "addressing information" must comply with |
||
851 | // RFC 5321 (and in turn RFC 1035), anything that is "semantically |
||
852 | // invisible" must comply only with RFC 5322. |
||
853 | switch ($this->token) { |
||
854 | |||
855 | // Comment |
||
856 | case Email::STRING_OPENPARENTHESIS: |
||
857 | if ($this->elementLen === 0) { |
||
858 | // Comments at the start of the domain are deprecated in the text |
||
859 | // Comments at the start of a subdomain are obs-domain |
||
860 | // (http://tools.ietf.org/html/rfc5322#section-3.4.1) |
||
861 | $this->returnStatus[] = ($this->elementCount === 0) ? Email::DEPREC_CFWS_NEAR_AT : Email::DEPREC_COMMENT; |
||
862 | } else { |
||
863 | $this->returnStatus[] = Email::CFWS_COMMENT; |
||
864 | // We can't start a comment in the middle of an element, so this better be the end |
||
865 | $this->endOrDie = true; |
||
866 | } |
||
867 | |||
868 | $this->contextStack[] = $this->context; |
||
869 | $this->context = Email::CONTEXT_COMMENT; |
||
870 | break; |
||
871 | |||
872 | // Next dot-atom element |
||
873 | case Email::STRING_DOT: |
||
874 | if ($this->elementLen === 0) { |
||
875 | // Another dot, already? |
||
876 | // Fatal error |
||
877 | $this->returnStatus[] = ($this->elementCount === 0) ? Email::ERR_DOT_START : Email::ERR_CONSECUTIVEDOTS; |
||
878 | } elseif ($this->hyphenFlag) { |
||
879 | // Previous subdomain ended in a hyphen |
||
880 | $this->returnStatus[] = Email::ERR_DOMAINHYPHENEND; |
||
881 | } else { |
||
882 | // Fatal error |
||
883 | // |
||
884 | // Nowhere in RFC 5321 does it say explicitly that the |
||
885 | // domain part of a Mailbox must be a valid domain according |
||
886 | // to the DNS standards set out in RFC 1035, but this *is* |
||
887 | // implied in several places. For instance, wherever the idea |
||
888 | // of host routing is discussed the RFC says that the domain |
||
889 | // must be looked up in the DNS. This would be nonsense unless |
||
890 | // the domain was designed to be a valid DNS domain. Hence we |
||
891 | // must conclude that the RFC 1035 restriction on label length |
||
892 | // also applies to RFC 5321 domains. |
||
893 | // |
||
894 | // http://tools.ietf.org/html/rfc1035#section-2.3.4 |
||
895 | // labels 63 octets or less |
||
896 | if ($this->elementLen > 63) { |
||
897 | $this->returnStatus[] = Email::RFC5322_LABEL_TOOLONG; |
||
898 | } |
||
899 | } |
||
900 | |||
901 | // CFWS is OK again now we're at the beginning of an element (although it may be obsolete CFWS) |
||
902 | $this->endOrDie = false; |
||
903 | $this->elementLen = 0; |
||
904 | $this->elementCount++; |
||
905 | $this->atomList[Email::COMPONENT_DOMAIN][$this->elementCount] = ''; |
||
906 | $this->parseData[Email::COMPONENT_DOMAIN] .= $this->token; |
||
907 | |||
908 | break; |
||
909 | |||
910 | // Domain literal |
||
911 | case Email::STRING_OPENSQBRACKET: |
||
912 | if ($this->parseData[Email::COMPONENT_DOMAIN] === '') { |
||
913 | // Domain literal must be the only component |
||
914 | $this->endOrDie = true; |
||
915 | $this->elementLen++; |
||
916 | $this->contextStack[] = $this->context; |
||
917 | $this->context = Email::COMPONENT_LITERAL; |
||
918 | $this->parseData[Email::COMPONENT_DOMAIN] .= $this->token; |
||
919 | $this->atomList[Email::COMPONENT_DOMAIN][$this->elementCount] .= $this->token; |
||
920 | $this->parseData[Email::COMPONENT_LITERAL] = ''; |
||
921 | } else { |
||
922 | // Fatal error |
||
923 | $this->returnStatus[] = Email::ERR_EXPECTING_ATEXT; |
||
924 | } |
||
925 | |||
926 | break; |
||
927 | |||
928 | // Folding White Space |
||
929 | case Email::STRING_CR: |
||
930 | case Email::STRING_SP: |
||
931 | case Email::STRING_HTAB: |
||
932 | if (($this->token === Email::STRING_CR) && ((++$this->pos === $this->rawLength) || ($this->email[$this->pos] !== Email::STRING_LF))) { |
||
933 | // Fatal error |
||
934 | $this->returnStatus[] = Email::ERR_CR_NO_LF; |
||
935 | break; |
||
936 | } |
||
937 | |||
938 | if ($this->elementLen === 0) { |
||
939 | $this->returnStatus[] = ($this->elementCount === 0) ? Email::DEPREC_CFWS_NEAR_AT : Email::DEPREC_FWS; |
||
940 | } else { |
||
941 | $this->returnStatus[] = Email::CFWS_FWS; |
||
942 | // We can't start FWS in the middle of an element, so this better be the end |
||
943 | $this->endOrDie = true; |
||
944 | } |
||
945 | |||
946 | $this->contextStack[] = $this->context; |
||
947 | $this->context = Email::CONTEXT_FWS; |
||
948 | $this->tokenPrior = $this->token; |
||
949 | break; |
||
950 | |||
951 | // atext |
||
952 | default: |
||
953 | // RFC 5322 allows any atext... |
||
954 | // http://tools.ietf.org/html/rfc5322#section-3.2.3 |
||
955 | // atext = ALPHA / DIGIT / ; Printable US-ASCII |
||
956 | // "!" / "#" / ; characters not including |
||
957 | // "$" / "%" / ; specials. Used for atoms. |
||
958 | // "&" / "'" / |
||
959 | // "*" / "+" / |
||
960 | // "-" / "/" / |
||
961 | // " = " / "?" / |
||
962 | // "^" / "_" / |
||
963 | // "`" / "{" / |
||
964 | // "|" / "}" / |
||
965 | // "~" |
||
966 | |||
967 | // But RFC 5321 only allows letter-digit-hyphen to comply with DNS rules (RFCs 1034 & 1123) |
||
968 | // http://tools.ietf.org/html/rfc5321#section-4.1.2 |
||
969 | // sub-domain = Let-dig [Ldh-str] |
||
970 | // |
||
971 | // Let-dig = ALPHA / DIGIT |
||
972 | // |
||
973 | // Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig |
||
974 | // |
||
975 | if ($this->endOrDie) { |
||
976 | // We have encountered atext where it is no longer valid |
||
977 | switch ($this->contextPrior) { |
||
978 | case Email::CONTEXT_COMMENT: |
||
979 | case Email::CONTEXT_FWS: |
||
980 | $this->returnStatus[] = Email::ERR_ATEXT_AFTER_CFWS; |
||
981 | break; |
||
982 | case Email::COMPONENT_LITERAL: |
||
983 | $this->returnStatus[] = Email::ERR_ATEXT_AFTER_DOMLIT; |
||
984 | break; |
||
985 | default: |
||
986 | throw new Exception("More atext found where none is allowed, but unrecognised prior context: {$this->contextPrior}"); |
||
987 | } |
||
988 | } |
||
989 | |||
990 | $ord = ord($this->token); |
||
991 | |||
992 | // Assume this token isn't a hyphen unless we discover it is |
||
993 | $this->hyphenFlag = false; |
||
994 | |||
995 | if (($ord < 33) || ($ord > 126) || (!is_bool(strpos(Email::STRING_SPECIALS, $this->token)))) { |
||
996 | // Fatal error |
||
997 | $this->returnStatus[] = Email::ERR_EXPECTING_ATEXT; |
||
998 | } elseif ($this->token === Email::STRING_HYPHEN) { |
||
999 | if ($this->elementLen === 0) { |
||
1000 | // Hyphens can't be at the beginning of a subdomain |
||
1001 | // Fatal error |
||
1002 | $this->returnStatus[] = Email::ERR_DOMAINHYPHENSTART; |
||
1003 | } |
||
1004 | $this->hyphenFlag = true; |
||
1005 | } elseif (!(($ord > 47 && $ord < 58) || ($ord > 64 && $ord < 91) || ($ord > 96 && $ord < 123))) { |
||
1006 | // Not an RFC 5321 subdomain, but still OK by RFC 5322 |
||
1007 | $this->returnStatus[] = Email::RFC5322_DOMAIN; |
||
1008 | } |
||
1009 | |||
1010 | $this->parseData[Email::COMPONENT_DOMAIN] .= $this->token; |
||
1011 | $this->atomList[Email::COMPONENT_DOMAIN][$this->elementCount] .= $this->token; |
||
1012 | $this->elementLen++; |
||
1013 | } |
||
1014 | } |
||
1015 | |||
1016 | /** |
||
1017 | * |
||
1018 | * Parse for a literal component. |
||
1019 | * |
||
1020 | * @return null |
||
1021 | * |
||
1022 | */ |
||
1023 | protected function parseComponentLiteral(): void |
||
1024 | { |
||
1025 | // http://tools.ietf.org/html/rfc5322#section-3.4.1 |
||
1026 | // domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] |
||
1027 | // |
||
1028 | // dtext = %d33-90 / ; Printable US-ASCII |
||
1029 | // %d94-126 / ; characters not including |
||
1030 | // obs-dtext ; "[", "]", or "\" |
||
1031 | // |
||
1032 | // obs-dtext = obs-NO-WS-CTL / quoted-pair |
||
1033 | switch ($this->token) { |
||
1034 | |||
1035 | // End of domain literal |
||
1036 | case Email::STRING_CLOSESQBRACKET: |
||
1037 | if ((int) max($this->returnStatus) < Email::DEPREC) { |
||
1038 | // Could be a valid RFC 5321 address literal, so let's check |
||
1039 | |||
1040 | // http://tools.ietf.org/html/rfc5321#section-4.1.2 |
||
1041 | // address-literal = "[" ( IPv4-address-literal / |
||
1042 | // IPv6-address-literal / |
||
1043 | // General-address-literal ) "]" |
||
1044 | // ; See Section 4.1.3 |
||
1045 | // |
||
1046 | // http://tools.ietf.org/html/rfc5321#section-4.1.3 |
||
1047 | // IPv4-address-literal = Snum 3("." Snum) |
||
1048 | // |
||
1049 | // IPv6-address-literal = "IPv6:" IPv6-addr |
||
1050 | // |
||
1051 | // General-address-literal = Standardized-tag ":" 1*dcontent |
||
1052 | // |
||
1053 | // Standardized-tag = Ldh-str |
||
1054 | // ; Standardized-tag MUST be specified in a |
||
1055 | // ; Standards-Track RFC and registered with IANA |
||
1056 | // |
||
1057 | // dcontent = %d33-90 / ; Printable US-ASCII |
||
1058 | // %d94-126 ; excl. "[", "\", "]" |
||
1059 | // |
||
1060 | // Snum = 1*3DIGIT |
||
1061 | // ; representing a decimal integer |
||
1062 | // ; value in the range 0 through 255 |
||
1063 | // |
||
1064 | // IPv6-addr = IPv6-full / IPv6-comp / IPv6v4-full / IPv6v4-comp |
||
1065 | // |
||
1066 | // IPv6-hex = 1*4HEXDIG |
||
1067 | // |
||
1068 | // IPv6-full = IPv6-hex 7(":" IPv6-hex) |
||
1069 | // |
||
1070 | // IPv6-comp = [IPv6-hex *5(":" IPv6-hex)] "::" |
||
1071 | // [IPv6-hex *5(":" IPv6-hex)] |
||
1072 | // ; The "::" represents at least 2 16-bit groups of |
||
1073 | // ; zeros. No more than 6 groups in addition to the |
||
1074 | // ; "::" may be present. |
||
1075 | // |
||
1076 | // IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" IPv4-address-literal |
||
1077 | // |
||
1078 | // IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] "::" |
||
1079 | // [IPv6-hex *3(":" IPv6-hex) ":"] |
||
1080 | // IPv4-address-literal |
||
1081 | // ; The "::" represents at least 2 16-bit groups of |
||
1082 | // ; zeros. No more than 4 groups in addition to the |
||
1083 | // ; "::" and IPv4-address-literal may be present. |
||
1084 | // |
||
1085 | // is_email() author's note: We can't use ip2long() to validate |
||
1086 | // IPv4 addresses because it accepts abbreviated addresses |
||
1087 | // (xxx.xxx.xxx), expanding the last group to complete the address. |
||
1088 | // filter_var() validates IPv6 address inconsistently (up to PHP 5.3.3 |
||
1089 | // at least) -- see http://bugs.php.net/bug.php?id = 53236 for example |
||
1090 | $max_groups = 8; |
||
1091 | $matchesIP = array(); |
||
1092 | $index = false; |
||
1093 | $addressliteral = $this->parseData[Email::COMPONENT_LITERAL]; |
||
1094 | |||
1095 | // Extract IPv4 part from the end of the address-literal (if there is one) |
||
1096 | if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressliteral, $matchesIP) > 0) { |
||
1097 | $index = strrpos($addressliteral, $matchesIP[0]); |
||
1098 | if ($index !== 0) { |
||
1099 | // Convert IPv4 part to IPv6 format for further testing |
||
1100 | $addressliteral = substr($addressliteral, 0, $index) . '0:0'; |
||
1101 | } |
||
1102 | } |
||
1103 | |||
1104 | if ($index === 0) { |
||
1105 | // Nothing there except a valid IPv4 address, so... |
||
1106 | $this->returnStatus[] = Email::RFC5321_ADDRESSLITERAL; |
||
1107 | } elseif (strncasecmp($addressliteral, Email::STRING_IPV6TAG, 5) !== 0) { |
||
1108 | $this->returnStatus[] = Email::RFC5322_DOMAINLITERAL; |
||
1109 | } else { |
||
1110 | $IPv6 = substr($addressliteral, 5); |
||
1111 | // Revision 2.7: Daniel Marschall's new IPv6 testing strategy |
||
1112 | $matchesIP = explode(Email::STRING_COLON, $IPv6); |
||
1113 | $groupCount = count($matchesIP); |
||
1114 | $index = strpos($IPv6, Email::STRING_DOUBLECOLON); |
||
1115 | |||
1116 | if ($index === false) { |
||
1117 | // We need exactly the right number of groups |
||
1118 | if ($groupCount !== $max_groups) { |
||
1119 | $this->returnStatus[] = Email::RFC5322_IPV6_GRPCOUNT; |
||
1120 | } |
||
1121 | } else { |
||
1122 | if ($index !== strrpos($IPv6, Email::STRING_DOUBLECOLON)) { |
||
1123 | $this->returnStatus[] = Email::RFC5322_IPV6_2X2XCOLON; |
||
1124 | } else { |
||
1125 | if ($index === 0 || $index === (strlen($IPv6) - 2)) { |
||
1126 | // RFC 4291 allows :: at the start or end of an address with 7 other groups in addition |
||
1127 | $max_groups++; |
||
1128 | } |
||
1129 | |||
1130 | if ($groupCount > $max_groups) { |
||
1131 | $this->returnStatus[] = Email::RFC5322_IPV6_MAXGRPS; |
||
1132 | } elseif ($groupCount === $max_groups) { |
||
1133 | // Eliding a single "::" |
||
1134 | $this->returnStatus[] = Email::RFC5321_IPV6DEPRECATED; |
||
1135 | } |
||
1136 | } |
||
1137 | } |
||
1138 | |||
1139 | // Revision 2.7: Daniel Marschall's new IPv6 testing strategy |
||
1140 | if ((substr($IPv6, 0, 1) === Email::STRING_COLON) && (substr($IPv6, 1, 1) !== Email::STRING_COLON)) { |
||
1141 | // Address starts with a single colon |
||
1142 | $this->returnStatus[] = Email::RFC5322_IPV6_COLONSTRT; |
||
1143 | } elseif ((substr($IPv6, -1) === Email::STRING_COLON) && (substr($IPv6, -2, 1) !== Email::STRING_COLON)) { |
||
1144 | // Address ends with a single colon |
||
1145 | $this->returnStatus[] = Email::RFC5322_IPV6_COLONEND; |
||
1146 | } elseif (is_array(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) && count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) !== 0) { |
||
1147 | // Check for unmatched characters |
||
1148 | $this->returnStatus[] = Email::RFC5322_IPV6_BADCHAR; |
||
1149 | } else { |
||
1150 | $this->returnStatus[] = Email::RFC5321_ADDRESSLITERAL; |
||
1151 | } |
||
1152 | } |
||
1153 | } else { |
||
1154 | $this->returnStatus[] = Email::RFC5322_DOMAINLITERAL; |
||
1155 | } |
||
1156 | |||
1157 | $this->parseData[Email::COMPONENT_DOMAIN] .= $this->token; |
||
1158 | $this->atomList[Email::COMPONENT_DOMAIN][$this->elementCount] .= $this->token; |
||
1159 | $this->elementLen++; |
||
1160 | $this->contextPrior = $this->context; |
||
1161 | $this->context = (int) array_pop($this->contextStack); |
||
1162 | break; |
||
1163 | |||
1164 | case Email::STRING_BACKSLASH: |
||
1165 | $this->returnStatus[] = Email::RFC5322_DOMLIT_OBSDTEXT; |
||
1166 | $this->contextStack[] = $this->context; |
||
1167 | $this->context = Email::CONTEXT_QUOTEDPAIR; |
||
1168 | break; |
||
1169 | |||
1170 | // Folding White Space |
||
1171 | case Email::STRING_CR: |
||
1172 | case Email::STRING_SP: |
||
1173 | case Email::STRING_HTAB: |
||
1174 | if (($this->token === Email::STRING_CR) && ((++$this->pos === $this->rawLength) || ($this->email[$this->pos] !== Email::STRING_LF))) { |
||
1175 | // Fatal error |
||
1176 | $this->returnStatus[] = Email::ERR_CR_NO_LF; |
||
1177 | break; |
||
1178 | } |
||
1179 | |||
1180 | $this->returnStatus[] = Email::CFWS_FWS; |
||
1181 | |||
1182 | $this->contextStack[] = $this->context; |
||
1183 | $this->context = Email::CONTEXT_FWS; |
||
1184 | $this->tokenPrior = $this->token; |
||
1185 | break; |
||
1186 | |||
1187 | // dtext |
||
1188 | default: |
||
1189 | // http://tools.ietf.org/html/rfc5322#section-3.4.1 |
||
1190 | // dtext = %d33-90 / ; Printable US-ASCII |
||
1191 | // %d94-126 / ; characters not including |
||
1192 | // obs-dtext ; "[", "]", or "\" |
||
1193 | // |
||
1194 | // obs-dtext = obs-NO-WS-CTL / quoted-pair |
||
1195 | // |
||
1196 | // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control |
||
1197 | // %d11 / ; characters that do not |
||
1198 | // %d12 / ; include the carriage |
||
1199 | // %d14-31 / ; return, line feed, and |
||
1200 | // %d127 ; white space characters |
||
1201 | $ord = ord($this->token); |
||
1202 | |||
1203 | // CR, LF, SP & HTAB have already been parsed above |
||
1204 | if (($ord > 127) || ($ord === 0) || ($this->token === Email::STRING_OPENSQBRACKET)) { |
||
1205 | $this->returnStatus[] = Email::ERR_EXPECTING_DTEXT; // Fatal error |
||
1206 | break; |
||
1207 | } elseif (($ord < 33) || ($ord === 127)) { |
||
1208 | $this->returnStatus[] = Email::RFC5322_DOMLIT_OBSDTEXT; |
||
1209 | } |
||
1210 | |||
1211 | $this->parseData[Email::COMPONENT_LITERAL] .= $this->token; |
||
1212 | $this->parseData[Email::COMPONENT_DOMAIN] .= $this->token; |
||
1213 | $this->atomList[Email::COMPONENT_DOMAIN][$this->elementCount] .= $this->token; |
||
1214 | $this->elementLen++; |
||
1215 | } |
||
1216 | } |
||
1217 | |||
1218 | /** |
||
1219 | * |
||
1220 | * Parse for a quoted-string context. |
||
1221 | * |
||
1222 | * @return null |
||
1223 | * |
||
1224 | */ |
||
1225 | protected function parseContextQuotedString(): void |
||
1226 | { |
||
1227 | // http://tools.ietf.org/html/rfc5322#section-3.2.4 |
||
1228 | // quoted-string = [CFWS] |
||
1229 | // DQUOTE *([FWS] qcontent) [FWS] DQUOTE |
||
1230 | // [CFWS] |
||
1231 | // |
||
1232 | // qcontent = qtext / quoted-pair |
||
1233 | switch ($this->token) { |
||
1234 | |||
1235 | // Quoted pair |
||
1236 | case Email::STRING_BACKSLASH: |
||
1237 | $this->contextStack[] = $this->context; |
||
1238 | $this->context = Email::CONTEXT_QUOTEDPAIR; |
||
1239 | break; |
||
1240 | |||
1241 | // Folding White Space |
||
1242 | // Inside a quoted string, spaces are allowed as regular characters. |
||
1243 | // It's only FWS if we include HTAB or CRLF |
||
1244 | case Email::STRING_CR: |
||
1245 | case Email::STRING_HTAB: |
||
1246 | if (($this->token === Email::STRING_CR) && ((++$this->pos === $this->rawLength) || ($this->email[$this->pos] !== Email::STRING_LF))) { |
||
1247 | // Fatal error |
||
1248 | $this->returnStatus[] = Email::ERR_CR_NO_LF; |
||
1249 | break; |
||
1250 | } |
||
1251 | |||
1252 | // http://tools.ietf.org/html/rfc5322#section-3.2.2 |
||
1253 | // Runs of FWS, comment, or CFWS that occur between lexical tokens in a |
||
1254 | // structured header field are semantically interpreted as a single |
||
1255 | // space character. |
||
1256 | |||
1257 | // http://tools.ietf.org/html/rfc5322#section-3.2.4 |
||
1258 | // the CRLF in any FWS/CFWS that appears within the quoted-string [is] |
||
1259 | // semantically "invisible" and therefore not part of the quoted-string |
||
1260 | $this->parseData[Email::COMPONENT_LOCALPART] .= Email::STRING_SP; |
||
1261 | $this->atomList[Email::COMPONENT_LOCALPART][$this->elementCount] .= Email::STRING_SP; |
||
1262 | $this->elementLen++; |
||
1263 | |||
1264 | $this->returnStatus[] = Email::CFWS_FWS; |
||
1265 | $this->contextStack[] = $this->context; |
||
1266 | $this->context = Email::CONTEXT_FWS; |
||
1267 | $this->tokenPrior = $this->token; |
||
1268 | break; |
||
1269 | |||
1270 | // End of quoted string |
||
1271 | case Email::STRING_DQUOTE: |
||
1272 | $this->parseData[Email::COMPONENT_LOCALPART] .= $this->token; |
||
1273 | $this->atomList[Email::COMPONENT_LOCALPART][$this->elementCount] .= $this->token; |
||
1274 | $this->elementLen++; |
||
1275 | $this->contextPrior = $this->context; |
||
1276 | $this->context = (int) array_pop($this->contextStack); |
||
1277 | break; |
||
1278 | |||
1279 | // qtext |
||
1280 | default: |
||
1281 | // http://tools.ietf.org/html/rfc5322#section-3.2.4 |
||
1282 | // qtext = %d33 / ; Printable US-ASCII |
||
1283 | // %d35-91 / ; characters not including |
||
1284 | // %d93-126 / ; "\" or the quote character |
||
1285 | // obs-qtext |
||
1286 | // |
||
1287 | // obs-qtext = obs-NO-WS-CTL |
||
1288 | // |
||
1289 | // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control |
||
1290 | // %d11 / ; characters that do not |
||
1291 | // %d12 / ; include the carriage |
||
1292 | // %d14-31 / ; return, line feed, and |
||
1293 | // %d127 ; white space characters |
||
1294 | $ord = ord($this->token); |
||
1295 | |||
1296 | if (($ord > 127) || ($ord === 0) || ($ord === 10)) { |
||
1297 | // Fatal error |
||
1298 | $this->returnStatus[] = Email::ERR_EXPECTING_QTEXT; |
||
1299 | } elseif (($ord < 32) || ($ord === 127)) { |
||
1300 | $this->returnStatus[] = Email::DEPREC_QTEXT; |
||
1301 | } |
||
1302 | |||
1303 | $this->parseData[Email::COMPONENT_LOCALPART] .= $this->token; |
||
1304 | $this->atomList[Email::COMPONENT_LOCALPART][$this->elementCount] .= $this->token; |
||
1305 | $this->elementLen++; |
||
1306 | } |
||
1307 | |||
1308 | // http://tools.ietf.org/html/rfc5322#section-3.4.1 |
||
1309 | // If the string can be represented as a dot-atom (that is, it contains |
||
1310 | // no characters other than atext characters or "." surrounded by atext |
||
1311 | // characters), then the dot-atom form SHOULD be used and the quoted- |
||
1312 | // string form SHOULD NOT be used. |
||
1313 | // |
||
1314 | // TODO |
||
1315 | // |
||
1316 | } |
||
1317 | |||
1318 | /** |
||
1319 | * |
||
1320 | * Parse for a quoted-pair context. |
||
1321 | * |
||
1322 | * @return null |
||
1323 | * |
||
1324 | */ |
||
1325 | protected function parseContextQuotedPair(): void |
||
1382 | } |
||
1383 | } |
||
1384 | |||
1385 | /** |
||
1386 | * |
||
1387 | * Parse for a comment context. |
||
1388 | * |
||
1389 | * @return null |
||
1390 | * |
||
1391 | */ |
||
1392 | protected function parseContextComment(): void |
||
1393 | { |
||
1394 | // http://tools.ietf.org/html/rfc5322#section-3.2.2 |
||
1395 | // comment = "(" *([FWS] ccontent) [FWS] ")" |
||
1396 | // |
||
1397 | // ccontent = ctext / quoted-pair / comment |
||
1398 | switch ($this->token) { |
||
1399 | |||
1400 | // Nested comment |
||
1401 | case Email::STRING_OPENPARENTHESIS: |
||
1402 | // Nested comments are OK |
||
1403 | $this->contextStack[] = $this->context; |
||
1404 | $this->context = Email::CONTEXT_COMMENT; |
||
1405 | break; |
||
1406 | |||
1407 | // End of comment |
||
1408 | case Email::STRING_CLOSEPARENTHESIS: |
||
1409 | $this->contextPrior = $this->context; |
||
1410 | $this->context = (int) array_pop($this->contextStack); |
||
1411 | |||
1412 | // http://tools.ietf.org/html/rfc5322#section-3.2.2 |
||
1413 | // Runs of FWS, comment, or CFWS that occur between lexical tokens in a |
||
1414 | // structured header field are semantically interpreted as a single |
||
1415 | // space character. |
||
1416 | // |
||
1417 | // is_email() author's note: This *cannot* mean that we must add a |
||
1418 | // space to the address wherever CFWS appears. This would result in |
||
1419 | // any addr-spec that had CFWS outside a quoted string being invalid |
||
1420 | // for RFC 5321. |
||
1421 | // |
||
1422 | // if (($this->context === Email::COMPONENT_LOCALPART) || ($this->context === Email::COMPONENT_DOMAIN)) { |
||
1423 | // $this->parseData[$this->context] .= Email::STRING_SP; |
||
1424 | // $this->atomList[$this->context][$this->elementCount] .= Email::STRING_SP; |
||
1425 | // $this->elementLen++; |
||
1426 | // } |
||
1427 | |||
1428 | break; |
||
1429 | |||
1430 | // Quoted pair |
||
1431 | case Email::STRING_BACKSLASH: |
||
1432 | $this->contextStack[] = $this->context; |
||
1433 | $this->context = Email::CONTEXT_QUOTEDPAIR; |
||
1434 | break; |
||
1435 | |||
1436 | // Folding White Space |
||
1437 | case Email::STRING_CR: |
||
1438 | case Email::STRING_SP: |
||
1439 | case Email::STRING_HTAB: |
||
1440 | if (($this->token === Email::STRING_CR) && ((++$this->pos === $this->rawLength) || ($this->email[$this->pos] !== Email::STRING_LF))) { |
||
1441 | // Fatal error |
||
1442 | $this->returnStatus[] = Email::ERR_CR_NO_LF; |
||
1443 | break; |
||
1444 | } |
||
1445 | |||
1446 | $this->returnStatus[] = Email::CFWS_FWS; |
||
1447 | |||
1448 | $this->contextStack[] = $this->context; |
||
1449 | $this->context = Email::CONTEXT_FWS; |
||
1450 | $this->tokenPrior = $this->token; |
||
1451 | break; |
||
1452 | |||
1453 | // ctext |
||
1454 | default: |
||
1455 | // http://tools.ietf.org/html/rfc5322#section-3.2.3 |
||
1456 | // ctext = %d33-39 / ; Printable US-ASCII |
||
1457 | // %d42-91 / ; characters not including |
||
1458 | // %d93-126 / ; "(", ")", or "\" |
||
1459 | // obs-ctext |
||
1460 | // |
||
1461 | // obs-ctext = obs-NO-WS-CTL |
||
1462 | // |
||
1463 | // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control |
||
1464 | // %d11 / ; characters that do not |
||
1465 | // %d12 / ; include the carriage |
||
1466 | // %d14-31 / ; return, line feed, and |
||
1467 | // %d127 ; white space characters |
||
1468 | $ord = ord($this->token); |
||
1469 | |||
1470 | if (($ord > 127) || ($ord === 0) || ($ord === 10)) { |
||
1471 | $this->returnStatus[] = Email::ERR_EXPECTING_CTEXT; // Fatal error |
||
1472 | break; |
||
1473 | } elseif (($ord < 32) || ($ord === 127)) { |
||
1474 | $this->returnStatus[] = Email::DEPREC_CTEXT; |
||
1475 | } |
||
1476 | } |
||
1477 | } |
||
1478 | |||
1479 | /** |
||
1480 | * |
||
1481 | * Parse for a folding-white-space context. |
||
1482 | * |
||
1483 | * @return null |
||
1484 | * |
||
1485 | */ |
||
1486 | protected function parseContextFws(): void |
||
1487 | { |
||
1488 | // http://tools.ietf.org/html/rfc5322#section-3.2.2 |
||
1489 | // FWS = ([*WSP CRLF] 1*WSP) / obs-FWS |
||
1490 | // ; Folding white space |
||
1491 | |||
1492 | // But note the erratum: |
||
1493 | // http://www.rfc-editor.org/errata_search.php?rfc = 5322&eid = 1908: |
||
1494 | // In the obsolete syntax, any amount of folding white space MAY be |
||
1495 | // inserted where the obs-FWS rule is allowed. This creates the |
||
1496 | // possibility of having two consecutive "folds" in a line, and |
||
1497 | // therefore the possibility that a line which makes up a folded header |
||
1498 | // field could be composed entirely of white space. |
||
1499 | // |
||
1500 | // obs-FWS = 1*([CRLF] WSP) |
||
1501 | if ($this->tokenPrior === Email::STRING_CR) { |
||
1502 | if ($this->token === Email::STRING_CR) { |
||
1503 | // Fatal error |
||
1504 | $this->returnStatus[] = Email::ERR_FWS_CRLF_X2; |
||
1505 | return; |
||
1506 | } |
||
1507 | |||
1508 | if (isset($this->crlfCount)) { |
||
1509 | if (++$this->crlfCount > 1) { |
||
1510 | $this->returnStatus[] = Email::DEPREC_FWS; |
||
1511 | } // Multiple folds = obsolete FWS |
||
1512 | } else { |
||
1513 | $this->crlfCount = 1; |
||
1514 | } |
||
1515 | } |
||
1516 | |||
1517 | switch ($this->token) { |
||
1518 | case Email::STRING_CR: |
||
1519 | if ((++$this->pos === $this->rawLength) || ($this->email[$this->pos] !== Email::STRING_LF)) { |
||
1520 | // Fatal error |
||
1521 | $this->returnStatus[] = Email::ERR_CR_NO_LF; |
||
1522 | } |
||
1523 | break; |
||
1524 | |||
1525 | case Email::STRING_SP: |
||
1526 | case Email::STRING_HTAB: |
||
1527 | break; |
||
1528 | |||
1529 | default: |
||
1530 | if ($this->tokenPrior === Email::STRING_CR) { |
||
1531 | // Fatal error |
||
1532 | $this->returnStatus[] = Email::ERR_FWS_CRLF_END; |
||
1533 | break; |
||
1534 | } |
||
1535 | |||
1536 | if (isset($this->crlfCount)) { |
||
1537 | unset($this->crlfCount); |
||
1538 | } |
||
1539 | |||
1540 | $this->contextPrior = $this->context; |
||
1541 | $this->context = (int) array_pop($this->contextStack); // End of FWS |
||
1542 | |||
1543 | // http://tools.ietf.org/html/rfc5322#section-3.2.2 |
||
1544 | // Runs of FWS, comment, or CFWS that occur between lexical tokens in a |
||
1545 | // structured header field are semantically interpreted as a single |
||
1546 | // space character. |
||
1547 | // |
||
1548 | // is_email() author's note: This *cannot* mean that we must add a |
||
1549 | // space to the address wherever CFWS appears. This would result in |
||
1550 | // any addr-spec that had CFWS outside a quoted string being invalid |
||
1551 | // for RFC 5321. |
||
1552 | // |
||
1553 | // if (($this->context === Email::COMPONENT_LOCALPART) || ($this->context === Email::COMPONENT_DOMAIN)) { |
||
1554 | // $this->parseData[$this->context] .= Email::STRING_SP; |
||
1555 | // $this->atomList[$this->context][$this->elementCount] .= Email::STRING_SP; |
||
1556 | // $this->elementLen++; |
||
1557 | // } |
||
1558 | |||
1559 | $this->pos--; // Look at this token again in the parent context |
||
1560 | } |
||
1561 | |||
1562 | $this->tokenPrior = $this->token; |
||
1563 | } |
||
1564 | |||
1565 | /** |
||
1566 | * |
||
1567 | * Final wrap-up parsing. |
||
1568 | * |
||
1569 | * @return null |
||
1570 | * |
||
1571 | */ |
||
1572 | protected function parseFinal(): void |
||
1628 | } |
||
1629 | } |
||
1630 | } |
||
1631 | |||
1632 | /** |
||
1633 | * |
||
1634 | * Make a DNS check on the MX record, if requested. |
||
1635 | * |
||
1636 | * @return null |
||
1637 | * |
||
1638 | */ |
||
1639 | protected function checkDns(): void |
||
1640 | { |
||
1641 | // Check DNS? |
||
1642 | if ($this->checkDns && ((int) max($this->returnStatus) < Email::DNSWARN) && function_exists('dns_get_record')) { |
||
1643 | // http://tools.ietf.org/html/rfc5321#section-2.3.5 |
||
1644 | // Names that can |
||
1645 | // be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed |
||
1646 | // in Section 5) are permitted, as are CNAME RRs whose targets can be |
||
1647 | // resolved, in turn, to MX or address RRs. |
||
1648 | // |
||
1649 | // http://tools.ietf.org/html/rfc5321#section-5.1 |
||
1650 | // The lookup first attempts to locate an MX record associated with the |
||
1651 | // name. If a CNAME record is found, the resulting name is processed as |
||
1652 | // if it were the initial name. ... If an empty list of MXs is returned, |
||
1653 | // the address is treated as if it was associated with an implicit MX |
||
1654 | // RR, with a preference of 0, pointing to that host. |
||
1655 | // |
||
1656 | // is_email() author's note: We will regard the existence of a CNAME to be |
||
1657 | // sufficient evidence of the domain's existence. For performance reasons |
||
1658 | // we will not repeat the DNS lookup for the CNAME's target, but we will |
||
1659 | // raise a warning because we didn't immediately find an MX record. |
||
1660 | if ($this->elementCount === 0) { |
||
1661 | // Checking TLD DNS seems to work only if you explicitly check from the root |
||
1662 | $this->parseData[Email::COMPONENT_DOMAIN] .= '.'; |
||
1663 | } |
||
1664 | |||
1665 | $result = @dns_get_record($this->parseData[Email::COMPONENT_DOMAIN], DNS_MX); // Not using checkdnsrr because of a suspected bug in PHP 5.3 (http://bugs.php.net/bug.php?id = 51844) |
||
1666 | |||
1667 | if ((is_bool($result) && !(bool) $result)) { |
||
1668 | // Domain can't be found in DNS |
||
1669 | $this->returnStatus[] = Email::DNSWARN_NO_RECORD; |
||
1670 | } else { |
||
1671 | if ((is_array($result) || $result instanceof Countable ? count($result) : 0) === 0) { |
||
1672 | // MX-record for domain can't be found |
||
1673 | $this->returnStatus[] = Email::DNSWARN_NO_MX_RECORD; |
||
1674 | $result = @dns_get_record($this->parseData[Email::COMPONENT_DOMAIN], DNS_A + DNS_CNAME); |
||
1675 | if (is_array($result) && count($result) === 0) { |
||
1676 | // No usable records for the domain can be found |
||
1677 | $this->returnStatus[] = Email::DNSWARN_NO_RECORD; |
||
1678 | } |
||
1679 | } else { |
||
1680 | $this->dnsChecked = true; |
||
1681 | } |
||
1682 | } |
||
1683 | } |
||
1684 | } |
||
1685 | |||
1686 | /** |
||
1687 | * |
||
1688 | * Check the top-level domain of the address. |
||
1689 | * |
||
1690 | * @return null |
||
1691 | * |
||
1692 | */ |
||
1693 | protected function checkTld(): void |
||
1734 | } |
||
1735 | } |
||
1736 | } |
||
1737 | |||
1738 | /** |
||
1739 | * |
||
1740 | * Sets the final status and return status. |
||
1741 | * |
||
1742 | * @return null |
||
1743 | * |
||
1744 | */ |
||
1745 | protected function finalStatus(): void |
||
1759 | } |
||
1760 | } |
||
1761 | } |
||
1762 |