Passed
Pull Request — 4.x (#151)
by Hari
09:45
created

Email::parse()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 11
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 3
eloc 6
nc 3
nop 0
dl 0
loc 11
rs 10
c 3
b 0
f 0
1
<?php
2
/**
3
 *
4
 * This file is part of Aura for PHP.
5
 *
6
 * @license http://opensource.org/licenses/mit-license.php MIT
7
 *
8
 */
9
namespace Aura\Filter\Rule\Validate;
10
11
use Countable;
12
use Aura\Filter\Exception;
13
14
/**
15
 *
16
 * Check that an email address conforms to RFCs 5321, 5322 and others.
17
 *
18
 * As of Version 3.0, we are now distinguishing clearly between a Mailbox
19
 * as defined by RFC 5321 and an addr-spec as defined by RFC 5322. Depending
20
 * on the context, either can be regarded as a valid email address. The
21
 * RFC 5321 Mailbox specification is more restrictive (comments, white space
22
 * and obsolete forms are not allowed).
23
 *
24
 * Read the following RFCs to understand the constraints:
25
 *
26
 * - <http://tools.ietf.org/html/rfc5321>
27
 * - <http://tools.ietf.org/html/rfc5322>
28
 * - <http://tools.ietf.org/html/rfc4291#section-2.2>
29
 * - <http://tools.ietf.org/html/rfc1123#section-2.1>
30
 * - <http://tools.ietf.org/html/rfc3696> (guidance only)
31
 *
32
 * Copyright © 2008-2011, Dominic Sayers
33
 * Test schema documentation Copyright © 2011, Daniel Marschall
34
 * All rights reserved.
35
 *
36
 * ---
37
 *
38
 * N.B.: Refactored by Paul M. Jones, from Dominic Sayers' is_email() function
39
 * to methods and properties. Errors and omissions should be presumed to be a
40
 * result of the refactoring, not of the original function.
41
 *
42
 * Further, this validation rule converts IDNs to ASCII, which is not required
43
 * per se by any of the email RFCs.
44
 *
45
 * ---
46
 *
47
 * @package Aura.Filter
48
 *
49
 * @author Dominic Sayers <[email protected]>
50
 *
51
 * @author Paul M. Jones <[email protected]>
52
 *
53
 * @copyright 2008-2011 Dominic Sayers
54
 *
55
 * @copyright 2015 Paul M. Jones
56
 *
57
 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
58
 *
59
 * @link http://www.dominicsayers.com/isemail
60
 *
61
 * @version 3.04.1 - Changed my link to http://isemail.info throughout
62
 *
63
 */
64
class Email
65
{
66
    /*:diagnostic constants start:*/
67
68
    // Categories
69
    public const VALID_CATEGORY = 1;
70
    public const DNSWARN = 7;
71
    public const RFC5321 = 15;
72
    public const CFWS = 31;
73
    public const DEPREC = 63;
74
    public const RFC5322 = 127;
75
    public const ERR = 255;
76
77
    // Diagnoses
78
    // Address is valid
79
    public const VALID = 0;
80
    // Address is valid but a DNS check was not successful
81
    public const DNSWARN_NO_MX_RECORD = 5;
82
    public const DNSWARN_NO_RECORD = 6;
83
    // Address is valid for SMTP but has unusual elements
84
    public const RFC5321_TLD = 9;
85
    public const RFC5321_TLDNUMERIC = 10;
86
    public const RFC5321_QUOTEDSTRING = 11;
87
    public const RFC5321_ADDRESSLITERAL = 12;
88
    public const RFC5321_IPV6DEPRECATED = 13;
89
    // Address is valid within the message but cannot be used unmodified for the envelope
90
    public const CFWS_COMMENT = 17;
91
    public const CFWS_FWS = 18;
92
    // Address contains deprecated elements but may still be valid in restricted contexts
93
    public const DEPREC_LOCALPART = 33;
94
    public const DEPREC_FWS = 34;
95
    public const DEPREC_QTEXT = 35;
96
    public const DEPREC_QP = 36;
97
    public const DEPREC_COMMENT = 37;
98
    public const DEPREC_CTEXT = 38;
99
    public const DEPREC_CFWS_NEAR_AT = 49;
100
    // The address is only valid according to the broad definition of RFC 5322.
101
    // It is otherwise invalid.
102
    public const RFC5322_DOMAIN = 65;
103
    public const RFC5322_TOOLONG = 66;
104
    public const RFC5322_LOCAL_TOOLONG = 67;
105
    public const RFC5322_DOMAIN_TOOLONG = 68;
106
    public const RFC5322_LABEL_TOOLONG = 69;
107
    public const RFC5322_DOMAINLITERAL = 70;
108
    public const RFC5322_DOMLIT_OBSDTEXT = 71;
109
    public const RFC5322_IPV6_GRPCOUNT = 72;
110
    public const RFC5322_IPV6_2X2XCOLON = 73;
111
    public const RFC5322_IPV6_BADCHAR = 74;
112
    public const RFC5322_IPV6_MAXGRPS = 75;
113
    public const RFC5322_IPV6_COLONSTRT = 76;
114
    public const RFC5322_IPV6_COLONEND = 77;
115
    // Address is invalid for any purpose
116
    public const ERR_EXPECTING_DTEXT = 129;
117
    public const ERR_NOLOCALPART = 130;
118
    public const ERR_NODOMAIN = 131;
119
    public const ERR_CONSECUTIVEDOTS = 132;
120
    public const ERR_ATEXT_AFTER_CFWS = 133;
121
    public const ERR_ATEXT_AFTER_QS = 134;
122
    public const ERR_ATEXT_AFTER_DOMLIT = 135;
123
    public const ERR_EXPECTING_QPAIR = 136;
124
    public const ERR_EXPECTING_ATEXT = 137;
125
    public const ERR_EXPECTING_QTEXT = 138;
126
    public const ERR_EXPECTING_CTEXT = 139;
127
    public const ERR_BACKSLASHEND = 140;
128
    public const ERR_DOT_START = 141;
129
    public const ERR_DOT_END = 142;
130
    public const ERR_DOMAINHYPHENSTART = 143;
131
    public const ERR_DOMAINHYPHENEND = 144;
132
    public const ERR_UNCLOSEDQUOTEDSTR = 145;
133
    public const ERR_UNCLOSEDCOMMENT = 146;
134
    public const ERR_UNCLOSEDDOMLIT = 147;
135
    public const ERR_FWS_CRLF_X2 = 148;
136
    public const ERR_FWS_CRLF_END = 149;
137
    public const ERR_CR_NO_LF = 150;
138
    /*:diagnostic constants end:*/
139
140
    // function control
141
    public const THRESHOLD = 16;
142
143
    // Email parts
144
    public const COMPONENT_LOCALPART = 0;
145
    public const COMPONENT_DOMAIN = 1;
146
    public const COMPONENT_LITERAL = 2;
147
    public const CONTEXT_COMMENT = 3;
148
    public const CONTEXT_FWS = 4;
149
    public const CONTEXT_QUOTEDSTRING = 5;
150
    public const CONTEXT_QUOTEDPAIR = 6;
151
152
    // Miscellaneous string constants
153
    public const STRING_AT = '@';
154
    public const STRING_BACKSLASH = '\\';
155
    public const STRING_DOT = '.';
156
    public const STRING_DQUOTE = '"';
157
    public const STRING_OPENPARENTHESIS = '(';
158
    public const STRING_CLOSEPARENTHESIS = ')';
159
    public const STRING_OPENSQBRACKET = '[';
160
    public const STRING_CLOSESQBRACKET = ']';
161
    public const STRING_HYPHEN = '-';
162
    public const STRING_COLON = ':';
163
    public const STRING_DOUBLECOLON = '::';
164
    public const STRING_SP = ' ';
165
    public const STRING_HTAB = "\t";
166
    public const STRING_CR = "\r";
167
    public const STRING_LF = "\n";
168
    public const STRING_IPV6TAG = 'IPv6:';
169
170
    // US-ASCII visible characters not valid for atext
171
    // <http://tools.ietf.org/html/rfc5322#section-3.2.3>
172
    public const STRING_SPECIALS = '()<>[]:;@\\,."';
173
174
    /**
175
     *
176
     * The email address being checked.
177
     *
178
     * @var string
179
     *
180
     */
181
    protected $email;
182
183
    /**
184
     *
185
     * Check DNS as part of validation?
186
     *
187
     * @var bool
188
     *
189
     */
190
    protected $checkDns;
191
192
    /**
193
     *
194
     * The validation threshold level.
195
     *
196
     * @var int
197
     *
198
     */
199
    protected $threshold;
200
201
    /**
202
     *
203
     * Diagnose errors?
204
     *
205
     * @var bool
206
     *
207
     */
208
    protected $diagnose;
209
210
    /**
211
     *
212
     * Has DNS been checked?
213
     *
214
     * @var bool
215
     *
216
     */
217
    protected $dnsChecked;
218
219
    /**
220
     *
221
     * The return status.
222
     *
223
     * @var int
224
     *
225
     */
226
    protected $returnStatus;
227
228
    /**
229
     *
230
     * The length of the email address being checked.
231
     *
232
     * @var int
233
     *
234
     */
235
    protected $rawLength;
236
237
    /**
238
     *
239
     * The current parser context.
240
     *
241
     * @var int
242
     *
243
     */
244
    protected $context;
245
246
    /**
247
     *
248
     * Parser context stack.
249
     *
250
     * @var array
251
     *
252
     */
253
    protected $contextStack;
254
255
    /**
256
     *
257
     * The prior parser context.
258
     *
259
     * @var int
260
     *
261
     */
262
    protected $contextPrior;
263
264
    /**
265
     *
266
     * The current token being parsed.
267
     *
268
     * @var string
269
     *
270
     */
271
    protected $token;
272
273
    /**
274
     *
275
     * The previous token being parsed.
276
     *
277
     * @var string
278
     *
279
     */
280
    protected $tokenPrior;
281
282
    /**
283
     *
284
     * The components of the address.
285
     *
286
     * @var array
287
     *
288
     */
289
    protected $parseData;
290
291
    /**
292
     *
293
     * The dot-atom elements of the address.
294
     *
295
     * @var array
296
     *
297
     */
298
    protected $atomList;
299
300
    /**
301
     *
302
     * Element count.
303
     *
304
     * @var int
305
     *
306
     */
307
    protected $elementCount;
308
309
    /**
310
     *
311
     * Element length.
312
     *
313
     * @var int
314
     *
315
     */
316
    protected $elementLen;
317
318
    /**
319
     *
320
     * Is a hyphen allowed?
321
     *
322
     * @var bool
323
     *
324
     */
325
    protected $hyphenFlag;
326
327
    /**
328
     *
329
     * CFWS can only appear at the end of the element
330
     *
331
     * @var bool
332
     *
333
     */
334
    protected $endOrDie;
335
336
    /**
337
     *
338
     * Current position in the email string.
339
     *
340
     * @var int
341
     *
342
     */
343
    protected $pos;
344
345
    /**
346
     *
347
     * Count of CRLF occurrences.
348
     *
349
     * @var null|int
350
     *
351
     */
352
    protected $crlfCount;
353
354
    /**
355
     *
356
     * The final status of email validation.
357
     *
358
     * @var int
359
     *
360
     */
361
    protected $finalStatus;
362
363
    /**
364
     *
365
     * Validates that the value is an email address.
366
     *
367
     * @param object $subject The subject to be filtered.
368
     *
369
     * @param string $field The subject field name.
370
     *
371
     * @return bool True if valid, false if not.
372
     *
373
     */
374
    public function __invoke($subject, $field)
375
    {
376
        $email = $subject->$field;
377
        if ($this->intl()) {
378
            $email = $this->idnToAscii($email);
379
        }
380
        return $this->isEmail($email);
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->isEmail($email) also could return the type integer which is incompatible with the documented return type boolean.
Loading history...
381
    }
382
383
    /**
384
     *
385
     * Is the intl extension loaded?
386
     *
387
     *
388
     */
389
    protected function intl(): bool
390
    {
391
        return extension_loaded('intl');
392
    }
393
394
    /**
395
     *
396
     * Converts an international domain in the email address to ASCII.
397
     *
398
     * @param string $email The email address to check.
399
     *
400
     * @return string The email with the IDN converted to ASCII (if possible).
401
     *
402
     */
403
    protected function idnToAscii(string $email): string
404
    {
405
        $parts = explode('@', $email);
406
        $domain = array_pop($parts);
407
        if (! $parts) {
408
            // no parts remaining, so no @ symbol, so not valid to begin with
409
            return $email;
410
        }
411
412
        // put the parts back together, with the domain part converted to ascii
413
        return implode('@', $parts) . '@' . idn_to_ascii($domain, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46);
414
    }
415
416
    /**
417
     *
418
     * Checks that an email address conforms to RFCs 5321, 5322 and others,
419
     * allowing for international domain names when the intl extension is
420
     * loaded.
421
     *
422
     * @param string $email The email address to check.
423
     *
424
     * @param bool $checkDns Make a DNS check for MX records?
425
     *
426
     * @param mixed $errorlevel Determines the boundary between valid and
427
     * invalid addresses. Status codes above this number will be returned as-
428
     * is, status codes below will be returned as Email::VALID. Thus the
429
     * calling program can simply look for Email::VALID if it is only
430
     * interested in whether an address is valid or not. The errorlevel will
431
     * determine how "picky" is_email() is about the address. If omitted or
432
     * passed as false then isEmail() will return true or false rather than
433
     * an integer error or warning. N.B.: Note the difference between
434
     * $errorlevel = false and $errorlevel = 0.
435
     *
436
     */
437
    protected function isEmail(string $email, bool $checkDns = false, $errorlevel = false)
438
    {
439
        $this->reset($email, $checkDns, $errorlevel);
440
        $this->parse();
441
        $this->checkDns();
442
        $this->checkTld();
443
        $this->finalStatus();
444
        return ($this->diagnose)
445
            ? $this->finalStatus
446
            : ($this->finalStatus < Email::THRESHOLD);
447
    }
448
449
    /**
450
     *
451
     * Resets the validation rule for a new email address.
452
     *
453
     * @param string $email The email address to check.
454
     *
455
     * @param bool $checkDns Make a DNS check for MX records?
456
     *
457
     * @param mixed $errorlevel Determines the boundary between valid and
458
     * invalid addresses.
459
     *
460
     * @return null
461
     *
462
     */
463
    protected function reset(string $email, bool $checkDns, $errorlevel): void
464
    {
465
        $this->email = $email;
466
467
        $this->checkDns = $checkDns;
468
        $this->dnsChecked = false;
469
470
        $this->setThresholdDiagnose($errorlevel);
471
472
        $this->returnStatus = array(Email::VALID);
0 ignored issues
show
Documentation Bug introduced by
It seems like array(Aura\Filter\Rule\Validate\Email::VALID) of type array<integer,integer> is incompatible with the declared type integer of property $returnStatus.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
473
        $this->rawLength = strlen($this->email);
474
475
        // Where we are
476
        $this->context = Email::COMPONENT_LOCALPART;
477
478
        // Where we have been
479
        $this->contextStack = array($this->context);
480
481
        // Where we just came from
482
        $this->contextPrior = Email::COMPONENT_LOCALPART;
483
484
        // The current character
485
        $this->token = '';
486
487
        // The previous character
488
        $this->tokenPrior = '';
489
490
        // For the components of the address
491
        $this->parseData = array(
492
            Email::COMPONENT_LOCALPART => '',
493
            Email::COMPONENT_DOMAIN => ''
494
        );
495
496
        // For the dot-atom elements of the address
497
        $this->atomList = array(
498
            Email::COMPONENT_LOCALPART => array(''),
499
            Email::COMPONENT_DOMAIN => array('')
500
        );
501
502
        $this->elementCount = 0;
503
        $this->elementLen = 0;
504
505
        // Hyphen cannot occur at the end of a subdomain
506
        $this->hyphenFlag = false;
507
508
        // CFWS can only appear at the end of the element
509
        $this->endOrDie = false;
510
511
        $this->finalStatus = null;
512
513
        $this->crlfCount = null;
514
    }
515
516
    /**
517
     *
518
     * Sets the $threshold and $diagnose properties.
519
     *
520
     * @param mixed $errorlevel Determines the boundary between valid and
521
     * invalid addresses.
522
     *
523
     * @return null
524
     *
525
     */
526
    protected function setThresholdDiagnose($errorlevel): void
527
    {
528
        if (is_bool($errorlevel)) {
529
            $this->threshold = Email::VALID;
530
            $this->diagnose = (bool) $errorlevel;
531
            return;
532
        }
533
534
        $this->diagnose = true;
535
536
        switch ((int) $errorlevel) {
537
            case E_WARNING:
538
                // For backward compatibility
539
                $this->threshold = Email::THRESHOLD;
540
                break;
541
            case E_ERROR:
542
                // For backward compatibility
543
                $this->threshold = Email::VALID;
544
                break;
545
            default:
546
                $this->threshold = (int) $errorlevel;
547
        }
548
    }
549
550
    /**
551
     *
552
     * Parse the address into components, character by character.
553
     *
554
     * @return null
555
     *
556
     */
557
    protected function parse(): void
558
    {
559
        for ($this->pos = 0; $this->pos < $this->rawLength; $this->pos++) {
560
            $this->token = $this->email[$this->pos];
561
            $this->parseContext();
562
            if ((int) max($this->returnStatus) > Email::RFC5322) {
563
                // No point going on if we've got a fatal error
564
                break;
565
            }
566
        }
567
        $this->parseFinal();
568
    }
569
570
    /**
571
     *
572
     * Parse for the current context.
573
     *
574
     * @return null
575
     *
576
     */
577
    protected function parseContext(): void
578
    {
579
        switch ($this->context) {
580
            case Email::COMPONENT_LOCALPART:
581
                $this->parseComponentLocalPart();
582
                break;
583
            case Email::COMPONENT_DOMAIN:
584
                $this->parseComponentDomain();
585
                break;
586
            case Email::COMPONENT_LITERAL:
587
                $this->parseComponentLiteral();
588
                break;
589
            case Email::CONTEXT_QUOTEDSTRING:
590
                $this->parseContextQuotedString();
591
                break;
592
            case Email::CONTEXT_QUOTEDPAIR:
593
                $this->parseContextQuotedPair();
594
                break;
595
            case Email::CONTEXT_COMMENT:
596
                $this->parseContextComment();
597
                break;
598
            case Email::CONTEXT_FWS:
599
                $this->parseContextFws();
600
                break;
601
            default:
602
                throw new Exception("Unknown context: {$this->context}");
603
        }
604
    }
605
606
    /**
607
     *
608
     * Parse for the local part component.
609
     *
610
     * @return null
611
     *
612
     */
613
    protected function parseComponentLocalPart(): void
614
    {
615
        // http://tools.ietf.org/html/rfc5322#section-3.4.1
616
        //   local-part = dot-atom / quoted-string / obs-local-part
617
        //
618
        //   dot-atom = [CFWS] dot-atom-text [CFWS]
619
        //
620
        //   dot-atom-text = 1*atext *("." 1*atext)
621
        //
622
        //   quoted-string = [CFWS]
623
        //                       DQUOTE *([FWS] qcontent) [FWS] DQUOTE
624
        //                       [CFWS]
625
        //
626
        //   obs-local-part = word *("." word)
627
        //
628
        //   word = atom / quoted-string
629
        //
630
        //   atom = [CFWS] 1*atext [CFWS]
631
        switch ($this->token) {
632
633
            // Comment
634
            case Email::STRING_OPENPARENTHESIS:
635
                if ($this->elementLen === 0) {
636
                    // Comments are OK at the beginning of an element
637
                    $this->returnStatus[] = ($this->elementCount === 0)
638
                        ? Email::CFWS_COMMENT
639
                        : Email::DEPREC_COMMENT;
640
                } else {
641
                    // We can't start a comment in the middle of an element, so this better be the end
642
                    $this->returnStatus[] = Email::CFWS_COMMENT;
643
                    $this->endOrDie = true;
644
                }
645
646
                $this->contextStack[] = $this->context;
647
                $this->context = Email::CONTEXT_COMMENT;
648
                break;
649
650
            // Next dot-atom element
651
            case Email::STRING_DOT:
652
                if ($this->elementLen === 0) {
653
                    // Another dot, already?
654
                    // Fatal error
655
                    $this->returnStatus[] = ($this->elementCount === 0)
656
                        ? Email::ERR_DOT_START
657
                        : Email::ERR_CONSECUTIVEDOTS;
658
                } else {
659
                    // The entire local-part can be a quoted string for RFC 5321
660
                    // If it's just one atom that is quoted then it's an RFC 5322 obsolete form
661
                    if ($this->endOrDie) {
662
                        $this->returnStatus[] = Email::DEPREC_LOCALPART;
663
                    }
664
                }
665
666
                // CFWS & quoted strings are OK again now we're at the beginning of an element (although they are obsolete forms)
667
                $this->endOrDie = false;
668
                $this->elementLen = 0;
669
                $this->elementCount++;
670
                $this->parseData[Email::COMPONENT_LOCALPART] .= $this->token;
671
                $this->atomList[Email::COMPONENT_LOCALPART][$this->elementCount] = '';
672
673
                break;
674
675
            // Quoted string
676
            case Email::STRING_DQUOTE:
677
                if ($this->elementLen === 0) {
678
                    // The entire local-part can be a quoted string for RFC 5321
679
                    // If it's just one atom that is quoted then it's an RFC 5322 obsolete form
680
                    $this->returnStatus[] = ($this->elementCount === 0)
681
                        ? Email::RFC5321_QUOTEDSTRING
682
                        : Email::DEPREC_LOCALPART;
683
684
                    $this->parseData[Email::COMPONENT_LOCALPART] .= $this->token;
685
                    $this->atomList[Email::COMPONENT_LOCALPART][$this->elementCount] .= $this->token;
686
                    $this->elementLen++;
687
                    // Quoted string must be the entire element
688
                    $this->endOrDie = true;
689
                    $this->contextStack[] = $this->context;
690
                    $this->context = Email::CONTEXT_QUOTEDSTRING;
691
                } else {
692
                    // Fatal error
693
                    $this->returnStatus[] = Email::ERR_EXPECTING_ATEXT;
694
                }
695
696
                break;
697
698
            // Folding White Space
699
            case Email::STRING_CR:
700
            case Email::STRING_SP:
701
            case Email::STRING_HTAB:
702
                if (($this->token === Email::STRING_CR) && ((++$this->pos === $this->rawLength) || ($this->email[$this->pos] !== Email::STRING_LF))) {
703
                    // Fatal error
704
                    $this->returnStatus[] = Email::ERR_CR_NO_LF;
705
                    break;
706
                }
707
708
                if ($this->elementLen === 0) {
709
                    $this->returnStatus[] = ($this->elementCount === 0) ? Email::CFWS_FWS : Email::DEPREC_FWS;
710
                } else {
711
                    // We can't start FWS in the middle of an element, so this better be the end
712
                    $this->endOrDie = true;
713
                }
714
715
                $this->contextStack[] = $this->context;
716
                $this->context = Email::CONTEXT_FWS;
717
                $this->tokenPrior = $this->token;
718
719
                break;
720
721
            // @
722
            case Email::STRING_AT:
723
                // At this point we should have a valid local-part
724
                if (count((array) $this->contextStack) !== 1) {
725
                    throw new Exception('Unexpected item on context stack');
726
                }
727
728
                if ($this->parseData[Email::COMPONENT_LOCALPART] === '') {
729
                    // Fatal error
730
                    $this->returnStatus[] = Email::ERR_NOLOCALPART;
731
                } elseif ($this->elementLen === 0) {
732
                    // Fatal error
733
                    $this->returnStatus[] = Email::ERR_DOT_END;
734
                } elseif (strlen($this->parseData[Email::COMPONENT_LOCALPART]) > 64) {
735
                    // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.1
736
                    //   The maximum total length of a user name or other local-part is 64
737
                    //   octets.
738
                    $this->returnStatus[] = Email::RFC5322_LOCAL_TOOLONG;
739
                } elseif (($this->contextPrior === Email::CONTEXT_COMMENT) || ($this->contextPrior === Email::CONTEXT_FWS)) {
740
                    // http://tools.ietf.org/html/rfc5322#section-3.4.1
741
                    //   Comments and folding white space
742
                    //   SHOULD NOT be used around the "@" in the addr-spec.
743
                    //
744
                    // http://tools.ietf.org/html/rfc2119
745
                    // 4. SHOULD NOT   This phrase, or the phrase "NOT RECOMMENDED" mean that
746
                    //    there may exist valid reasons in particular circumstances when the
747
                    //    particular behavior is acceptable or even useful, but the full
748
                    //    implications should be understood and the case carefully weighed
749
                    //    before implementing any behavior described with this label.
750
                    $this->returnStatus[] = Email::DEPREC_CFWS_NEAR_AT;
751
                }
752
753
                // Clear everything down for the domain parsing
754
                $this->context = Email::COMPONENT_DOMAIN; // Where we are
755
                $this->contextStack = array($this->context); // Where we have been
756
                $this->elementCount = 0;
757
                $this->elementLen = 0;
758
                $this->endOrDie = false; // CFWS can only appear at the end of the element
759
760
                break;
761
762
            // atext
763
            default:
764
                // http://tools.ietf.org/html/rfc5322#section-3.2.3
765
                //    atext = ALPHA / DIGIT /    ; Printable US-ASCII
766
                //                        "!" / "#" /        ;  characters not including
767
                //                        "$" / "%" /        ;  specials.  Used for atoms.
768
                //                        "&" / "'" /
769
                //                        "*" / "+" /
770
                //                        "-" / "/" /
771
                //                        " = " / "?" /
772
                //                        "^" / "_" /
773
                //                        "`" / "{" /
774
                //                        "|" / "}" /
775
                //                        "~"
776
                if ($this->endOrDie) {
777
                    // We have encountered atext where it is no longer valid
778
                    switch ($this->contextPrior) {
779
                        case Email::CONTEXT_COMMENT:
780
                        case Email::CONTEXT_FWS:
781
                            $this->returnStatus[] = Email::ERR_ATEXT_AFTER_CFWS;
782
                            break;
783
                        case Email::CONTEXT_QUOTEDSTRING:
784
                            $this->returnStatus[] = Email::ERR_ATEXT_AFTER_QS;
785
                            break;
786
                        default:
787
                            throw new Exception("More atext found where none is allowed, but unrecognised prior context: {$this->contextPrior}");
788
                    }
789
                } else {
790
                    $this->contextPrior = $this->context;
791
                    $ord = ord($this->token);
792
793
                    if (($ord < 33) || ($ord > 126) || ($ord === 10) || (!is_bool(strpos(Email::STRING_SPECIALS, $this->token)))) {
0 ignored issues
show
introduced by
The condition is_bool(strpos(Aura\Filt...PECIALS, $this->token)) is always false.
Loading history...
794
                        // Fatal error
795
                        $this->returnStatus[] = Email::ERR_EXPECTING_ATEXT;
796
                    }
797
798
                    $this->parseData[Email::COMPONENT_LOCALPART] .= $this->token;
799
                    $this->atomList[Email::COMPONENT_LOCALPART][$this->elementCount] .= $this->token;
800
                    $this->elementLen++;
801
                }
802
        }
803
    }
804
805
    /**
806
     *
807
     * Parse for the domain component.
808
     *
809
     * @return null
810
     *
811
     */
812
    protected function parseComponentDomain(): void
813
    {
814
        // http://tools.ietf.org/html/rfc5322#section-3.4.1
815
        //   domain = dot-atom / domain-literal / obs-domain
816
        //
817
        //   dot-atom = [CFWS] dot-atom-text [CFWS]
818
        //
819
        //   dot-atom-text = 1*atext *("." 1*atext)
820
        //
821
        //   domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
822
        //
823
        //   dtext = %d33-90 /          ; Printable US-ASCII
824
        //                       %d94-126 /         ;  characters not including
825
        //                       obs-dtext          ;  "[", "]", or "\"
826
        //
827
        //   obs-domain = atom *("." atom)
828
        //
829
        //   atom = [CFWS] 1*atext [CFWS]
830
831
        // http://tools.ietf.org/html/rfc5321#section-4.1.2
832
        //   Mailbox = Local-part "@" ( Domain / address-literal )
833
        //
834
        //   Domain = sub-domain *("." sub-domain)
835
        //
836
        //   address-literal = "[" ( IPv4-address-literal /
837
        //                    IPv6-address-literal /
838
        //                    General-address-literal ) "]"
839
        //                    ; See Section 4.1.3
840
841
        // http://tools.ietf.org/html/rfc5322#section-3.4.1
842
        //      Note: A liberal syntax for the domain portion of addr-spec is
843
        //      given here.  However, the domain portion contains addressing
844
        //      information specified by and used in other protocols (e.g.,
845
        //      [RFC1034], [RFC1035], [RFC1123], [RFC5321]).  It is therefore
846
        //      incumbent upon implementations to conform to the syntax of
847
        //      addresses for the context in which they are used.
848
        // is_email() author's note: it's not clear how to interpret this in
849
        // the context of a general email address validator. The conclusion I
850
        // have reached is this: "addressing information" must comply with
851
        // RFC 5321 (and in turn RFC 1035), anything that is "semantically
852
        // invisible" must comply only with RFC 5322.
853
        switch ($this->token) {
854
855
            // Comment
856
            case Email::STRING_OPENPARENTHESIS:
857
                if ($this->elementLen === 0) {
858
                    // Comments at the start of the domain are deprecated in the text
859
                    // Comments at the start of a subdomain are obs-domain
860
                    // (http://tools.ietf.org/html/rfc5322#section-3.4.1)
861
                    $this->returnStatus[] = ($this->elementCount === 0) ? Email::DEPREC_CFWS_NEAR_AT : Email::DEPREC_COMMENT;
862
                } else {
863
                    $this->returnStatus[] = Email::CFWS_COMMENT;
864
                    // We can't start a comment in the middle of an element, so this better be the end
865
                    $this->endOrDie = true;
866
                }
867
868
                $this->contextStack[] = $this->context;
869
                $this->context = Email::CONTEXT_COMMENT;
870
                break;
871
872
            // Next dot-atom element
873
            case Email::STRING_DOT:
874
                if ($this->elementLen === 0) {
875
                    // Another dot, already?
876
                    // Fatal error
877
                    $this->returnStatus[] = ($this->elementCount === 0) ? Email::ERR_DOT_START : Email::ERR_CONSECUTIVEDOTS;
878
                } elseif ($this->hyphenFlag) {
879
                    // Previous subdomain ended in a hyphen
880
                    $this->returnStatus[] = Email::ERR_DOMAINHYPHENEND;
881
                } else {
882
                    // Fatal error
883
                    //
884
                    // Nowhere in RFC 5321 does it say explicitly that the
885
                    // domain part of a Mailbox must be a valid domain according
886
                    // to the DNS standards set out in RFC 1035, but this *is*
887
                    // implied in several places. For instance, wherever the idea
888
                    // of host routing is discussed the RFC says that the domain
889
                    // must be looked up in the DNS. This would be nonsense unless
890
                    // the domain was designed to be a valid DNS domain. Hence we
891
                    // must conclude that the RFC 1035 restriction on label length
892
                    // also applies to RFC 5321 domains.
893
                    //
894
                    // http://tools.ietf.org/html/rfc1035#section-2.3.4
895
                    // labels          63 octets or less
896
                    if ($this->elementLen > 63) {
897
                        $this->returnStatus[] = Email::RFC5322_LABEL_TOOLONG;
898
                    }
899
                }
900
901
                // CFWS is OK again now we're at the beginning of an element (although it may be obsolete CFWS)
902
                $this->endOrDie = false;
903
                $this->elementLen = 0;
904
                $this->elementCount++;
905
                $this->atomList[Email::COMPONENT_DOMAIN][$this->elementCount] = '';
906
                $this->parseData[Email::COMPONENT_DOMAIN] .= $this->token;
907
908
                break;
909
910
            // Domain literal
911
            case Email::STRING_OPENSQBRACKET:
912
                if ($this->parseData[Email::COMPONENT_DOMAIN] === '') {
913
                    // Domain literal must be the only component
914
                    $this->endOrDie = true;
915
                    $this->elementLen++;
916
                    $this->contextStack[] = $this->context;
917
                    $this->context = Email::COMPONENT_LITERAL;
918
                    $this->parseData[Email::COMPONENT_DOMAIN] .= $this->token;
919
                    $this->atomList[Email::COMPONENT_DOMAIN][$this->elementCount] .= $this->token;
920
                    $this->parseData[Email::COMPONENT_LITERAL] = '';
921
                } else {
922
                    // Fatal error
923
                    $this->returnStatus[] = Email::ERR_EXPECTING_ATEXT;
924
                }
925
926
                break;
927
928
            // Folding White Space
929
            case Email::STRING_CR:
930
            case Email::STRING_SP:
931
            case Email::STRING_HTAB:
932
                if (($this->token === Email::STRING_CR) && ((++$this->pos === $this->rawLength) || ($this->email[$this->pos] !== Email::STRING_LF))) {
933
                    // Fatal error
934
                    $this->returnStatus[] = Email::ERR_CR_NO_LF;
935
                    break;
936
                }
937
938
                if ($this->elementLen === 0) {
939
                    $this->returnStatus[] = ($this->elementCount === 0) ? Email::DEPREC_CFWS_NEAR_AT : Email::DEPREC_FWS;
940
                } else {
941
                    $this->returnStatus[] = Email::CFWS_FWS;
942
                    // We can't start FWS in the middle of an element, so this better be the end
943
                    $this->endOrDie = true;
944
                }
945
946
                $this->contextStack[] = $this->context;
947
                $this->context = Email::CONTEXT_FWS;
948
                $this->tokenPrior = $this->token;
949
                break;
950
951
            // atext
952
            default:
953
                // RFC 5322 allows any atext...
954
                // http://tools.ietf.org/html/rfc5322#section-3.2.3
955
                //    atext = ALPHA / DIGIT /    ; Printable US-ASCII
956
                //                        "!" / "#" /        ;  characters not including
957
                //                        "$" / "%" /        ;  specials.  Used for atoms.
958
                //                        "&" / "'" /
959
                //                        "*" / "+" /
960
                //                        "-" / "/" /
961
                //                        " = " / "?" /
962
                //                        "^" / "_" /
963
                //                        "`" / "{" /
964
                //                        "|" / "}" /
965
                //                        "~"
966
967
                // But RFC 5321 only allows letter-digit-hyphen to comply with DNS rules (RFCs 1034 & 1123)
968
                // http://tools.ietf.org/html/rfc5321#section-4.1.2
969
                //   sub-domain = Let-dig [Ldh-str]
970
                //
971
                //   Let-dig = ALPHA / DIGIT
972
                //
973
                //   Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig
974
                //
975
                if ($this->endOrDie) {
976
                    // We have encountered atext where it is no longer valid
977
                    switch ($this->contextPrior) {
978
                        case Email::CONTEXT_COMMENT:
979
                        case Email::CONTEXT_FWS:
980
                            $this->returnStatus[] = Email::ERR_ATEXT_AFTER_CFWS;
981
                            break;
982
                        case Email::COMPONENT_LITERAL:
983
                            $this->returnStatus[] = Email::ERR_ATEXT_AFTER_DOMLIT;
984
                            break;
985
                        default:
986
                            throw new Exception("More atext found where none is allowed, but unrecognised prior context: {$this->contextPrior}");
987
                    }
988
                }
989
990
                $ord = ord($this->token);
991
992
                // Assume this token isn't a hyphen unless we discover it is
993
                $this->hyphenFlag = false;
994
995
                if (($ord < 33) || ($ord > 126) || (!is_bool(strpos(Email::STRING_SPECIALS, $this->token)))) {
0 ignored issues
show
introduced by
The condition is_bool(strpos(Aura\Filt...PECIALS, $this->token)) is always false.
Loading history...
996
                    // Fatal error
997
                    $this->returnStatus[] = Email::ERR_EXPECTING_ATEXT;
998
                } elseif ($this->token === Email::STRING_HYPHEN) {
999
                    if ($this->elementLen === 0) {
1000
                        // Hyphens can't be at the beginning of a subdomain
1001
                        // Fatal error
1002
                        $this->returnStatus[] = Email::ERR_DOMAINHYPHENSTART;
1003
                    }
1004
                    $this->hyphenFlag = true;
1005
                } elseif (!(($ord > 47 && $ord < 58) || ($ord > 64 && $ord < 91) || ($ord > 96 && $ord < 123))) {
1006
                    // Not an RFC 5321 subdomain, but still OK by RFC 5322
1007
                    $this->returnStatus[] = Email::RFC5322_DOMAIN;
1008
                }
1009
1010
                $this->parseData[Email::COMPONENT_DOMAIN] .= $this->token;
1011
                $this->atomList[Email::COMPONENT_DOMAIN][$this->elementCount] .= $this->token;
1012
                $this->elementLen++;
1013
        }
1014
    }
1015
1016
    /**
1017
     *
1018
     * Parse for a literal component.
1019
     *
1020
     * @return null
1021
     *
1022
     */
1023
    protected function parseComponentLiteral(): void
1024
    {
1025
        // http://tools.ietf.org/html/rfc5322#section-3.4.1
1026
        //   domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
1027
        //
1028
        //   dtext = %d33-90 /          ; Printable US-ASCII
1029
        //                       %d94-126 /         ;  characters not including
1030
        //                       obs-dtext          ;  "[", "]", or "\"
1031
        //
1032
        //   obs-dtext = obs-NO-WS-CTL / quoted-pair
1033
        switch ($this->token) {
1034
1035
            // End of domain literal
1036
            case Email::STRING_CLOSESQBRACKET:
1037
                if ((int) max($this->returnStatus) < Email::DEPREC) {
1038
                    // Could be a valid RFC 5321 address literal, so let's check
1039
1040
                    // http://tools.ietf.org/html/rfc5321#section-4.1.2
1041
                    //   address-literal = "[" ( IPv4-address-literal /
1042
                    //                    IPv6-address-literal /
1043
                    //                    General-address-literal ) "]"
1044
                    //                    ; See Section 4.1.3
1045
                    //
1046
                    // http://tools.ietf.org/html/rfc5321#section-4.1.3
1047
                    //   IPv4-address-literal = Snum 3("."  Snum)
1048
                    //
1049
                    //   IPv6-address-literal = "IPv6:" IPv6-addr
1050
                    //
1051
                    //   General-address-literal = Standardized-tag ":" 1*dcontent
1052
                    //
1053
                    //   Standardized-tag = Ldh-str
1054
                    //                     ; Standardized-tag MUST be specified in a
1055
                    //                     ; Standards-Track RFC and registered with IANA
1056
                    //
1057
                    //   dcontent = %d33-90 / ; Printable US-ASCII
1058
                    //                  %d94-126 ; excl. "[", "\", "]"
1059
                    //
1060
                    //   Snum = 1*3DIGIT
1061
                    //                  ; representing a decimal integer
1062
                    //                  ; value in the range 0 through 255
1063
                    //
1064
                    //   IPv6-addr = IPv6-full / IPv6-comp / IPv6v4-full / IPv6v4-comp
1065
                    //
1066
                    //   IPv6-hex = 1*4HEXDIG
1067
                    //
1068
                    //   IPv6-full = IPv6-hex 7(":" IPv6-hex)
1069
                    //
1070
                    //   IPv6-comp = [IPv6-hex *5(":" IPv6-hex)] "::"
1071
                    //                  [IPv6-hex *5(":" IPv6-hex)]
1072
                    //                  ; The "::" represents at least 2 16-bit groups of
1073
                    //                  ; zeros.  No more than 6 groups in addition to the
1074
                    //                  ; "::" may be present.
1075
                    //
1076
                    //   IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" IPv4-address-literal
1077
                    //
1078
                    //   IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] "::"
1079
                    //                  [IPv6-hex *3(":" IPv6-hex) ":"]
1080
                    //                  IPv4-address-literal
1081
                    //                  ; The "::" represents at least 2 16-bit groups of
1082
                    //                  ; zeros.  No more than 4 groups in addition to the
1083
                    //                  ; "::" and IPv4-address-literal may be present.
1084
                    //
1085
                    // is_email() author's note: We can't use ip2long() to validate
1086
                    // IPv4 addresses because it accepts abbreviated addresses
1087
                    // (xxx.xxx.xxx), expanding the last group to complete the address.
1088
                    // filter_var() validates IPv6 address inconsistently (up to PHP 5.3.3
1089
                    // at least) -- see http://bugs.php.net/bug.php?id = 53236 for example
1090
                    $max_groups = 8;
1091
                    $matchesIP = array();
1092
                    $index = false;
1093
                    $addressliteral = $this->parseData[Email::COMPONENT_LITERAL];
1094
1095
                    // Extract IPv4 part from the end of the address-literal (if there is one)
1096
                    if (preg_match('/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', $addressliteral, $matchesIP) > 0) {
1097
                        $index = strrpos($addressliteral, $matchesIP[0]);
1098
                        if ($index !== 0) {
1099
                            // Convert IPv4 part to IPv6 format for further testing
1100
                            $addressliteral = substr($addressliteral, 0, $index) . '0:0';
1101
                        }
1102
                    }
1103
1104
                    if ($index === 0) {
1105
                        // Nothing there except a valid IPv4 address, so...
1106
                        $this->returnStatus[] = Email::RFC5321_ADDRESSLITERAL;
1107
                    } elseif (strncasecmp($addressliteral, Email::STRING_IPV6TAG, 5) !== 0) {
1108
                        $this->returnStatus[] = Email::RFC5322_DOMAINLITERAL;
1109
                    } else {
1110
                        $IPv6 = substr($addressliteral, 5);
1111
                        // Revision 2.7: Daniel Marschall's new IPv6 testing strategy
1112
                        $matchesIP = explode(Email::STRING_COLON, $IPv6);
1113
                        $groupCount = count($matchesIP);
1114
                        $index = strpos($IPv6, Email::STRING_DOUBLECOLON);
1115
1116
                        if ($index === false) {
1117
                            // We need exactly the right number of groups
1118
                            if ($groupCount !== $max_groups) {
1119
                                $this->returnStatus[] = Email::RFC5322_IPV6_GRPCOUNT;
1120
                            }
1121
                        } else {
1122
                            if ($index !== strrpos($IPv6, Email::STRING_DOUBLECOLON)) {
1123
                                $this->returnStatus[] = Email::RFC5322_IPV6_2X2XCOLON;
1124
                            } else {
1125
                                if ($index === 0 || $index === (strlen($IPv6) - 2)) {
1126
                                    // RFC 4291 allows :: at the start or end of an address with 7 other groups in addition
1127
                                    $max_groups++;
1128
                                }
1129
1130
                                if ($groupCount > $max_groups) {
1131
                                    $this->returnStatus[] = Email::RFC5322_IPV6_MAXGRPS;
1132
                                } elseif ($groupCount === $max_groups) {
1133
                                    // Eliding a single "::"
1134
                                    $this->returnStatus[] = Email::RFC5321_IPV6DEPRECATED;
1135
                                }
1136
                            }
1137
                        }
1138
1139
                        // Revision 2.7: Daniel Marschall's new IPv6 testing strategy
1140
                        if ((substr($IPv6, 0,  1) === Email::STRING_COLON) && (substr($IPv6, 1,  1) !== Email::STRING_COLON)) {
1141
                            // Address starts with a single colon
1142
                            $this->returnStatus[] = Email::RFC5322_IPV6_COLONSTRT;
1143
                        } elseif ((substr($IPv6, -1) === Email::STRING_COLON) && (substr($IPv6, -2, 1) !== Email::STRING_COLON)) {
1144
                            // Address ends with a single colon
1145
                            $this->returnStatus[] = Email::RFC5322_IPV6_COLONEND;
1146
                        } elseif ((is_array(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) || preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT) instanceof Countable ? count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) : 0) !== 0) {
0 ignored issues
show
introduced by
The condition is_array(preg_grep('/^[0...date\PREG_GREP_INVERT)) is always true.
Loading history...
1147
                            // Check for unmatched characters
1148
                            $this->returnStatus[] = Email::RFC5322_IPV6_BADCHAR;
1149
                        } else {
1150
                            $this->returnStatus[] = Email::RFC5321_ADDRESSLITERAL;
1151
                        }
1152
                    }
1153
                } else {
1154
                    $this->returnStatus[] = Email::RFC5322_DOMAINLITERAL;
1155
                }
1156
1157
                $this->parseData[Email::COMPONENT_DOMAIN] .= $this->token;
1158
                $this->atomList[Email::COMPONENT_DOMAIN][$this->elementCount] .= $this->token;
1159
                $this->elementLen++;
1160
                $this->contextPrior = $this->context;
1161
                $this->context = (int) array_pop($this->contextStack);
1162
                break;
1163
1164
            case Email::STRING_BACKSLASH:
1165
                $this->returnStatus[] = Email::RFC5322_DOMLIT_OBSDTEXT;
1166
                $this->contextStack[] = $this->context;
1167
                $this->context = Email::CONTEXT_QUOTEDPAIR;
1168
                break;
1169
1170
            // Folding White Space
1171
            case Email::STRING_CR:
1172
            case Email::STRING_SP:
1173
            case Email::STRING_HTAB:
1174
                if (($this->token === Email::STRING_CR) && ((++$this->pos === $this->rawLength) || ($this->email[$this->pos] !== Email::STRING_LF))) {
1175
                    // Fatal error
1176
                    $this->returnStatus[] = Email::ERR_CR_NO_LF;
1177
                    break;
1178
                }
1179
1180
                $this->returnStatus[] = Email::CFWS_FWS;
1181
1182
                $this->contextStack[] = $this->context;
1183
                $this->context = Email::CONTEXT_FWS;
1184
                $this->tokenPrior = $this->token;
1185
                break;
1186
1187
            // dtext
1188
            default:
1189
                // http://tools.ietf.org/html/rfc5322#section-3.4.1
1190
                //   dtext = %d33-90 /          ; Printable US-ASCII
1191
                //                       %d94-126 /         ;  characters not including
1192
                //                       obs-dtext          ;  "[", "]", or "\"
1193
                //
1194
                //   obs-dtext = obs-NO-WS-CTL / quoted-pair
1195
                //
1196
                //   obs-NO-WS-CTL = %d1-8 /            ; US-ASCII control
1197
                //                       %d11 /             ;  characters that do not
1198
                //                       %d12 /             ;  include the carriage
1199
                //                       %d14-31 /          ;  return, line feed, and
1200
                //                       %d127              ;  white space characters
1201
                $ord = ord($this->token);
1202
1203
                // CR, LF, SP & HTAB have already been parsed above
1204
                if (($ord > 127) || ($ord === 0) || ($this->token === Email::STRING_OPENSQBRACKET)) {
1205
                    $this->returnStatus[] = Email::ERR_EXPECTING_DTEXT; // Fatal error
1206
                    break;
1207
                } elseif (($ord < 33) || ($ord === 127)) {
1208
                    $this->returnStatus[] = Email::RFC5322_DOMLIT_OBSDTEXT;
1209
                }
1210
1211
                $this->parseData[Email::COMPONENT_LITERAL] .= $this->token;
1212
                $this->parseData[Email::COMPONENT_DOMAIN] .= $this->token;
1213
                $this->atomList[Email::COMPONENT_DOMAIN][$this->elementCount] .= $this->token;
1214
                $this->elementLen++;
1215
        }
1216
    }
1217
1218
    /**
1219
     *
1220
     * Parse for a quoted-string context.
1221
     *
1222
     * @return null
1223
     *
1224
     */
1225
    protected function parseContextQuotedString(): void
1226
    {
1227
        // http://tools.ietf.org/html/rfc5322#section-3.2.4
1228
        //   quoted-string = [CFWS]
1229
        //                       DQUOTE *([FWS] qcontent) [FWS] DQUOTE
1230
        //                       [CFWS]
1231
        //
1232
        //   qcontent = qtext / quoted-pair
1233
        switch ($this->token) {
1234
1235
            // Quoted pair
1236
            case Email::STRING_BACKSLASH:
1237
                $this->contextStack[] = $this->context;
1238
                $this->context = Email::CONTEXT_QUOTEDPAIR;
1239
                break;
1240
1241
            // Folding White Space
1242
            // Inside a quoted string, spaces are allowed as regular characters.
1243
            // It's only FWS if we include HTAB or CRLF
1244
            case Email::STRING_CR:
1245
            case Email::STRING_HTAB:
1246
                if (($this->token === Email::STRING_CR) && ((++$this->pos === $this->rawLength) || ($this->email[$this->pos] !== Email::STRING_LF))) {
1247
                    // Fatal error
1248
                    $this->returnStatus[] = Email::ERR_CR_NO_LF;
1249
                    break;
1250
                }
1251
1252
                // http://tools.ietf.org/html/rfc5322#section-3.2.2
1253
                //   Runs of FWS, comment, or CFWS that occur between lexical tokens in a
1254
                //   structured header field are semantically interpreted as a single
1255
                //   space character.
1256
1257
                // http://tools.ietf.org/html/rfc5322#section-3.2.4
1258
                //   the CRLF in any FWS/CFWS that appears within the quoted-string [is]
1259
                //   semantically "invisible" and therefore not part of the quoted-string
1260
                $this->parseData[Email::COMPONENT_LOCALPART] .= Email::STRING_SP;
1261
                $this->atomList[Email::COMPONENT_LOCALPART][$this->elementCount] .= Email::STRING_SP;
1262
                $this->elementLen++;
1263
1264
                $this->returnStatus[] = Email::CFWS_FWS;
1265
                $this->contextStack[] = $this->context;
1266
                $this->context = Email::CONTEXT_FWS;
1267
                $this->tokenPrior = $this->token;
1268
                break;
1269
1270
            // End of quoted string
1271
            case Email::STRING_DQUOTE:
1272
                $this->parseData[Email::COMPONENT_LOCALPART] .= $this->token;
1273
                $this->atomList[Email::COMPONENT_LOCALPART][$this->elementCount] .= $this->token;
1274
                $this->elementLen++;
1275
                $this->contextPrior = $this->context;
1276
                $this->context = (int) array_pop($this->contextStack);
1277
                break;
1278
1279
            // qtext
1280
            default:
1281
                // http://tools.ietf.org/html/rfc5322#section-3.2.4
1282
                //   qtext = %d33 /             ; Printable US-ASCII
1283
                //                       %d35-91 /          ;  characters not including
1284
                //                       %d93-126 /         ;  "\" or the quote character
1285
                //                       obs-qtext
1286
                //
1287
                //   obs-qtext = obs-NO-WS-CTL
1288
                //
1289
                //   obs-NO-WS-CTL = %d1-8 /            ; US-ASCII control
1290
                //                       %d11 /             ;  characters that do not
1291
                //                       %d12 /             ;  include the carriage
1292
                //                       %d14-31 /          ;  return, line feed, and
1293
                //                       %d127              ;  white space characters
1294
                $ord = ord($this->token);
1295
1296
                if (($ord > 127) || ($ord === 0) || ($ord === 10)) {
1297
                    // Fatal error
1298
                    $this->returnStatus[] = Email::ERR_EXPECTING_QTEXT;
1299
                } elseif (($ord < 32) || ($ord === 127)) {
1300
                    $this->returnStatus[] = Email::DEPREC_QTEXT;
1301
                }
1302
1303
                $this->parseData[Email::COMPONENT_LOCALPART] .= $this->token;
1304
                $this->atomList[Email::COMPONENT_LOCALPART][$this->elementCount] .= $this->token;
1305
                $this->elementLen++;
1306
        }
1307
1308
        // http://tools.ietf.org/html/rfc5322#section-3.4.1
1309
        //   If the string can be represented as a dot-atom (that is, it contains
1310
        //   no characters other than atext characters or "." surrounded by atext
1311
        //   characters), then the dot-atom form SHOULD be used and the quoted-
1312
        //   string form SHOULD NOT be used.
1313
        //
1314
        // TODO
1315
        //
1316
    }
1317
1318
    /**
1319
     *
1320
     * Parse for a quoted-pair context.
1321
     *
1322
     * @return null
1323
     *
1324
     */
1325
    protected function parseContextQuotedPair(): void
1326
    {
1327
        // http://tools.ietf.org/html/rfc5322#section-3.2.1
1328
        //   quoted-pair = ("\" (VCHAR / WSP)) / obs-qp
1329
        //
1330
        //   VCHAR = %d33-126            ; visible (printing) characters
1331
        //   WSP = SP / HTAB           ; white space
1332
        //
1333
        //   obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)
1334
        //
1335
        //   obs-NO-WS-CTL = %d1-8 /            ; US-ASCII control
1336
        //                       %d11 /             ;  characters that do not
1337
        //                       %d12 /             ;  include the carriage
1338
        //                       %d14-31 /          ;  return, line feed, and
1339
        //                       %d127              ;  white space characters
1340
        //
1341
        // i.e. obs-qp = "\" (%d0-8, %d10-31 / %d127)
1342
        $ord = ord($this->token);
1343
1344
        if ($ord > 127) {
1345
            $this->returnStatus[] = Email::ERR_EXPECTING_QPAIR;
1346
        } elseif ((($ord < 31) && ($ord !== 9)) || ($ord === 127)) {
1347
            // SP & HTAB are allowed
1348
            // Fatal error
1349
            $this->returnStatus[] = Email::DEPREC_QP;
1350
        }
1351
1352
        // At this point we know where this qpair occurred so
1353
        // we could check to see if the character actually
1354
        // needed to be quoted at all.
1355
        // http://tools.ietf.org/html/rfc5321#section-4.1.2
1356
        //   the sending system SHOULD transmit the
1357
        //   form that uses the minimum quoting possible.
1358
        //
1359
        // TODO: check whether the character needs to be quoted (escaped) in this context
1360
        //
1361
        $this->contextPrior = $this->context;
1362
        $this->context = (int) array_pop($this->contextStack); // End of qpair
1363
        $this->token = Email::STRING_BACKSLASH . $this->token;
1364
1365
        switch ($this->context) {
1366
            case Email::CONTEXT_COMMENT:
1367
                break;
1368
            case Email::CONTEXT_QUOTEDSTRING:
1369
                $this->parseData[Email::COMPONENT_LOCALPART] .= $this->token;
1370
                $this->atomList[Email::COMPONENT_LOCALPART][$this->elementCount] .= $this->token;
1371
                // The maximum sizes specified by RFC 5321 are octet counts, so we must include the backslash
1372
                $this->elementLen += 2;
1373
                break;
1374
            case Email::COMPONENT_LITERAL:
1375
                $this->parseData[Email::COMPONENT_DOMAIN] .= $this->token;
1376
                $this->atomList[Email::COMPONENT_DOMAIN][$this->elementCount] .= $this->token;
1377
                // The maximum sizes specified by RFC 5321 are octet counts, so we must include the backslash
1378
                $this->elementLen += 2;
1379
                break;
1380
            default:
1381
                throw new Exception("Quoted pair logic invoked in an invalid context: {$this->context}");
1382
        }
1383
    }
1384
1385
    /**
1386
     *
1387
     * Parse for a comment context.
1388
     *
1389
     * @return null
1390
     *
1391
     */
1392
    protected function parseContextComment(): void
1393
    {
1394
        // http://tools.ietf.org/html/rfc5322#section-3.2.2
1395
        //   comment = "(" *([FWS] ccontent) [FWS] ")"
1396
        //
1397
        //   ccontent = ctext / quoted-pair / comment
1398
        switch ($this->token) {
1399
1400
            // Nested comment
1401
            case Email::STRING_OPENPARENTHESIS:
1402
                // Nested comments are OK
1403
                $this->contextStack[] = $this->context;
1404
                $this->context = Email::CONTEXT_COMMENT;
1405
                break;
1406
1407
            // End of comment
1408
            case Email::STRING_CLOSEPARENTHESIS:
1409
                $this->contextPrior = $this->context;
1410
                $this->context = (int) array_pop($this->contextStack);
1411
1412
                // http://tools.ietf.org/html/rfc5322#section-3.2.2
1413
                //   Runs of FWS, comment, or CFWS that occur between lexical tokens in a
1414
                //   structured header field are semantically interpreted as a single
1415
                //   space character.
1416
                //
1417
                // is_email() author's note: This *cannot* mean that we must add a
1418
                // space to the address wherever CFWS appears. This would result in
1419
                // any addr-spec that had CFWS outside a quoted string being invalid
1420
                // for RFC 5321.
1421
                //
1422
                // if (($this->context === Email::COMPONENT_LOCALPART) || ($this->context === Email::COMPONENT_DOMAIN)) {
1423
                //     $this->parseData[$this->context] .= Email::STRING_SP;
1424
                //     $this->atomList[$this->context][$this->elementCount] .= Email::STRING_SP;
1425
                //     $this->elementLen++;
1426
                // }
1427
1428
                break;
1429
1430
            // Quoted pair
1431
            case Email::STRING_BACKSLASH:
1432
                $this->contextStack[] = $this->context;
1433
                $this->context = Email::CONTEXT_QUOTEDPAIR;
1434
                break;
1435
1436
            // Folding White Space
1437
            case Email::STRING_CR:
1438
            case Email::STRING_SP:
1439
            case Email::STRING_HTAB:
1440
                if (($this->token === Email::STRING_CR) && ((++$this->pos === $this->rawLength) || ($this->email[$this->pos] !== Email::STRING_LF))) {
1441
                    // Fatal error
1442
                    $this->returnStatus[] = Email::ERR_CR_NO_LF;
1443
                    break;
1444
                }
1445
1446
                $this->returnStatus[] = Email::CFWS_FWS;
1447
1448
                $this->contextStack[] = $this->context;
1449
                $this->context = Email::CONTEXT_FWS;
1450
                $this->tokenPrior = $this->token;
1451
                break;
1452
1453
            // ctext
1454
            default:
1455
                // http://tools.ietf.org/html/rfc5322#section-3.2.3
1456
                //   ctext = %d33-39 /          ; Printable US-ASCII
1457
                //                       %d42-91 /          ;  characters not including
1458
                //                       %d93-126 /         ;  "(", ")", or "\"
1459
                //                       obs-ctext
1460
                //
1461
                //   obs-ctext = obs-NO-WS-CTL
1462
                //
1463
                //   obs-NO-WS-CTL = %d1-8 /            ; US-ASCII control
1464
                //                       %d11 /             ;  characters that do not
1465
                //                       %d12 /             ;  include the carriage
1466
                //                       %d14-31 /          ;  return, line feed, and
1467
                //                       %d127              ;  white space characters
1468
                $ord = ord($this->token);
1469
1470
                if (($ord > 127) || ($ord === 0) || ($ord === 10)) {
1471
                    $this->returnStatus[] = Email::ERR_EXPECTING_CTEXT; // Fatal error
1472
                    break;
1473
                } elseif (($ord < 32) || ($ord === 127)) {
1474
                    $this->returnStatus[] = Email::DEPREC_CTEXT;
1475
                }
1476
        }
1477
    }
1478
1479
    /**
1480
     *
1481
     * Parse for a folding-white-space context.
1482
     *
1483
     * @return null
1484
     *
1485
     */
1486
    protected function parseContextFws(): void
1487
    {
1488
        // http://tools.ietf.org/html/rfc5322#section-3.2.2
1489
        //   FWS = ([*WSP CRLF] 1*WSP) /  obs-FWS
1490
        //                                          ; Folding white space
1491
1492
        // But note the erratum:
1493
        // http://www.rfc-editor.org/errata_search.php?rfc = 5322&eid = 1908:
1494
        //   In the obsolete syntax, any amount of folding white space MAY be
1495
        //   inserted where the obs-FWS rule is allowed.  This creates the
1496
        //   possibility of having two consecutive "folds" in a line, and
1497
        //   therefore the possibility that a line which makes up a folded header
1498
        //   field could be composed entirely of white space.
1499
        //
1500
        //   obs-FWS = 1*([CRLF] WSP)
1501
        if ($this->tokenPrior === Email::STRING_CR) {
1502
            if ($this->token === Email::STRING_CR) {
1503
                // Fatal error
1504
                $this->returnStatus[] = Email::ERR_FWS_CRLF_X2;
1505
                return;
1506
            }
1507
1508
            if (isset($this->crlfCount)) {
1509
                if (++$this->crlfCount > 1) {
1510
                    $this->returnStatus[] = Email::DEPREC_FWS;
1511
                } // Multiple folds = obsolete FWS
1512
            } else {
1513
                $this->crlfCount = 1;
1514
            }
1515
        }
1516
1517
        switch ($this->token) {
1518
            case Email::STRING_CR:
1519
                if ((++$this->pos === $this->rawLength) || ($this->email[$this->pos] !== Email::STRING_LF)) {
1520
                     // Fatal error
1521
                    $this->returnStatus[] = Email::ERR_CR_NO_LF;
1522
                }
1523
                break;
1524
1525
            case Email::STRING_SP:
1526
            case Email::STRING_HTAB:
1527
                break;
1528
1529
            default:
1530
                if ($this->tokenPrior === Email::STRING_CR) {
1531
                    // Fatal error
1532
                    $this->returnStatus[] = Email::ERR_FWS_CRLF_END;
1533
                    break;
1534
                }
1535
1536
                if (isset($this->crlfCount)) {
1537
                    unset($this->crlfCount);
1538
                }
1539
1540
                $this->contextPrior = $this->context;
1541
                $this->context = (int) array_pop($this->contextStack); // End of FWS
1542
1543
                // http://tools.ietf.org/html/rfc5322#section-3.2.2
1544
                //   Runs of FWS, comment, or CFWS that occur between lexical tokens in a
1545
                //   structured header field are semantically interpreted as a single
1546
                //   space character.
1547
                //
1548
                // is_email() author's note: This *cannot* mean that we must add a
1549
                // space to the address wherever CFWS appears. This would result in
1550
                // any addr-spec that had CFWS outside a quoted string being invalid
1551
                // for RFC 5321.
1552
                //
1553
                // if (($this->context === Email::COMPONENT_LOCALPART) || ($this->context === Email::COMPONENT_DOMAIN)) {
1554
                //     $this->parseData[$this->context] .= Email::STRING_SP;
1555
                //     $this->atomList[$this->context][$this->elementCount] .= Email::STRING_SP;
1556
                //     $this->elementLen++;
1557
                // }
1558
1559
                $this->pos--; // Look at this token again in the parent context
1560
        }
1561
1562
        $this->tokenPrior = $this->token;
1563
    }
1564
1565
    /**
1566
     *
1567
     * Final wrap-up parsing.
1568
     *
1569
     * @return null
1570
     *
1571
     */
1572
    protected function parseFinal(): void
1573
    {
1574
        // Some simple final tests
1575
        if ((int) max($this->returnStatus) < Email::RFC5322) {
1576
            if ($this->context === Email::CONTEXT_QUOTEDSTRING) {
1577
                // Fatal error
1578
                $this->returnStatus[] = Email::ERR_UNCLOSEDQUOTEDSTR;
1579
            } elseif ($this->context === Email::CONTEXT_QUOTEDPAIR) {
1580
                // Fatal error
1581
                $this->returnStatus[] = Email::ERR_BACKSLASHEND;
1582
            } elseif ($this->context === Email::CONTEXT_COMMENT) {
1583
                // Fatal error
1584
                $this->returnStatus[] = Email::ERR_UNCLOSEDCOMMENT;
1585
            } elseif ($this->context === Email::COMPONENT_LITERAL) {
1586
                // Fatal error
1587
                $this->returnStatus[] = Email::ERR_UNCLOSEDDOMLIT;
1588
            } elseif ($this->token === Email::STRING_CR) {
1589
                // Fatal error
1590
                $this->returnStatus[] = Email::ERR_FWS_CRLF_END;
1591
            } elseif ($this->parseData[Email::COMPONENT_DOMAIN] === '') {
1592
                // Fatal error
1593
                $this->returnStatus[] = Email::ERR_NODOMAIN;
1594
            } elseif ($this->elementLen === 0) {
1595
                // Fatal error
1596
                $this->returnStatus[] = Email::ERR_DOT_END;
1597
            } elseif ($this->hyphenFlag) {
1598
                // Fatal error
1599
                $this->returnStatus[] = Email::ERR_DOMAINHYPHENEND;
1600
            } elseif (strlen($this->parseData[Email::COMPONENT_DOMAIN]) > 255) {
1601
                // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.2
1602
                //   The maximum total length of a domain name or number is 255 octets.
1603
                $this->returnStatus[] = Email::RFC5322_DOMAIN_TOOLONG;
1604
            } elseif (strlen($this->parseData[Email::COMPONENT_LOCALPART] . Email::STRING_AT . $this->parseData[Email::COMPONENT_DOMAIN]) > 254) {
1605
                // http://tools.ietf.org/html/rfc5321#section-4.1.2
1606
                //   Forward-path = Path
1607
                //
1608
                //   Path = "<" [ A-d-l ":" ] Mailbox ">"
1609
                //
1610
                // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3
1611
                //   The maximum total length of a reverse-path or forward-path is 256
1612
                //   octets (including the punctuation and element separators).
1613
                //
1614
                // Thus, even without (obsolete) routing information, the Mailbox can
1615
                // only be 254 characters long. This is confirmed by this verified
1616
                // erratum to RFC 3696:
1617
                //
1618
                // http://www.rfc-editor.org/errata_search.php?rfc = 3696&eid = 1690
1619
                //   However, there is a restriction in RFC 2821 on the length of an
1620
                //   address in MAIL and RCPT commands of 254 characters.  Since addresses
1621
                //   that do not fit in those fields are not normally useful, the upper
1622
                //   limit on address lengths should normally be considered to be 254.
1623
                $this->returnStatus[] = Email::RFC5322_TOOLONG;
1624
            } elseif ($this->elementLen > 63) {
1625
                // http://tools.ietf.org/html/rfc1035#section-2.3.4
1626
                // labels          63 octets or less
1627
                $this->returnStatus[] = Email::RFC5322_LABEL_TOOLONG;
1628
            }
1629
        }
1630
    }
1631
1632
    /**
1633
     *
1634
     * Make a DNS check on the MX record, if requested.
1635
     *
1636
     * @return null
1637
     *
1638
     */
1639
    protected function checkDns(): void
1640
    {
1641
        // Check DNS?
1642
        if ($this->checkDns && ((int) max($this->returnStatus) < Email::DNSWARN) && function_exists('dns_get_record')) {
1643
            // http://tools.ietf.org/html/rfc5321#section-2.3.5
1644
            //   Names that can
1645
            //   be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
1646
            //   in Section 5) are permitted, as are CNAME RRs whose targets can be
1647
            //   resolved, in turn, to MX or address RRs.
1648
            //
1649
            // http://tools.ietf.org/html/rfc5321#section-5.1
1650
            //   The lookup first attempts to locate an MX record associated with the
1651
            //   name.  If a CNAME record is found, the resulting name is processed as
1652
            //   if it were the initial name. ... If an empty list of MXs is returned,
1653
            //   the address is treated as if it was associated with an implicit MX
1654
            //   RR, with a preference of 0, pointing to that host.
1655
            //
1656
            // is_email() author's note: We will regard the existence of a CNAME to be
1657
            // sufficient evidence of the domain's existence. For performance reasons
1658
            // we will not repeat the DNS lookup for the CNAME's target, but we will
1659
            // raise a warning because we didn't immediately find an MX record.
1660
            if ($this->elementCount === 0) {
1661
                // Checking TLD DNS seems to work only if you explicitly check from the root
1662
                $this->parseData[Email::COMPONENT_DOMAIN] .= '.';
1663
            }
1664
1665
            $result = @dns_get_record($this->parseData[Email::COMPONENT_DOMAIN], DNS_MX); // Not using checkdnsrr because of a suspected bug in PHP 5.3 (http://bugs.php.net/bug.php?id = 51844)
1666
1667
            if ((is_bool($result) && !(bool) $result)) {
1668
                // Domain can't be found in DNS
1669
                $this->returnStatus[] = Email::DNSWARN_NO_RECORD;
1670
            } else {
1671
                if ((is_array($result) || $result instanceof Countable ? count($result) : 0) === 0) {
1672
                    // MX-record for domain can't be found
1673
                    $this->returnStatus[] = Email::DNSWARN_NO_MX_RECORD;
1674
                    $result = @dns_get_record($this->parseData[Email::COMPONENT_DOMAIN], DNS_A + DNS_CNAME);
1675
                    if ((is_array($result) || $result instanceof Countable ? count($result) : 0) === 0) {
1676
                        // No usable records for the domain can be found
1677
                        $this->returnStatus[] = Email::DNSWARN_NO_RECORD;
1678
                    }
1679
                } else {
1680
                    $this->dnsChecked = true;
1681
                }
1682
            }
1683
        }
1684
    }
1685
1686
    /**
1687
     *
1688
     * Check the top-level domain of the address.
1689
     *
1690
     * @return null
1691
     *
1692
     */
1693
    protected function checkTld(): void
1694
    {
1695
        // Check for TLD addresses
1696
        // -----------------------
1697
        // TLD addresses are specifically allowed in RFC 5321 but they are
1698
        // unusual to say the least. We will allocate a separate
1699
        // status to these addresses on the basis that they are more likely
1700
        // to be typos than genuine addresses (unless we've already
1701
        // established that the domain does have an MX record)
1702
        //
1703
        // http://tools.ietf.org/html/rfc5321#section-2.3.5
1704
        //   In the case
1705
        //   of a top-level domain used by itself in an email address, a single
1706
        //   string is used without any dots.  This makes the requirement,
1707
        //   described in more detail below, that only fully-qualified domain
1708
        //   names appear in SMTP transactions on the public Internet,
1709
        //   particularly important where top-level domains are involved.
1710
        //
1711
        // TLD format
1712
        // ----------
1713
        // The format of TLDs has changed a number of times. The standards
1714
        // used by IANA have been largely ignored by ICANN, leading to
1715
        // confusion over the standards being followed. These are not defined
1716
        // anywhere, except as a general component of a DNS host name (a label).
1717
        // However, this could potentially lead to 123.123.123.123 being a
1718
        // valid DNS name (rather than an IP address) and thereby creating
1719
        // an ambiguity. The most authoritative statement on TLD formats that
1720
        // the author can find is in a (rejected!) erratum to RFC 1123
1721
        // submitted by John Klensin, the author of RFC 5321:
1722
        //
1723
        // http://www.rfc-editor.org/errata_search.php?rfc = 1123&eid = 1353
1724
        //   However, a valid host name can never have the dotted-decimal
1725
        //   form #.#.#.#, since this change does not permit the highest-level
1726
        //   component label to start with a digit even if it is not all-numeric.
1727
        if (!$this->dnsChecked && ((int) max($this->returnStatus) < Email::DNSWARN)) {
1728
            if ($this->elementCount === 0) {
1729
                $this->returnStatus[] = Email::RFC5321_TLD;
1730
            }
1731
1732
            if (is_numeric($this->atomList[Email::COMPONENT_DOMAIN][$this->elementCount][0])) {
1733
                $this->returnStatus[] = Email::RFC5321_TLDNUMERIC;
1734
            }
1735
        }
1736
    }
1737
1738
    /**
1739
     *
1740
     * Sets the final status and return status.
1741
     *
1742
     * @return null
1743
     *
1744
     */
1745
    protected function finalStatus(): void
1746
    {
1747
        $this->returnStatus = array_unique($this->returnStatus);
0 ignored issues
show
Documentation Bug introduced by
It seems like array_unique($this->returnStatus) of type array is incompatible with the declared type integer of property $returnStatus.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
Bug introduced by
$this->returnStatus of type integer is incompatible with the type array expected by parameter $array of array_unique(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1747
        $this->returnStatus = array_unique(/** @scrutinizer ignore-type */ $this->returnStatus);
Loading history...
1748
        $this->finalStatus = (int) max($this->returnStatus);
1749
1750
        if (count($this->returnStatus) !== 1) {
1751
            // remove redundant Email::VALID
1752
            array_shift($this->returnStatus);
1753
        }
1754
1755
        $this->parseData['status'] = $this->returnStatus;
1756
1757
        if ($this->finalStatus < $this->threshold) {
1758
            $this->finalStatus = Email::VALID;
1759
        }
1760
    }
1761
}
1762