Passed
Push — master ( b2a72e...3343ee )
by John
03:55
created

Rfc5322Validator::validateQuotedStringContent()   B

Complexity

Conditions 7
Paths 6

Size

Total Lines 32
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 15
c 0
b 0
f 0
dl 0
loc 32
rs 8.8333
cc 7
nc 6
nop 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace EmailValidator\Validator;
6
7
use EmailValidator\EmailAddress;
8
9
/**
10
 * Validates email addresses according to RFC 5322 standards
11
 *
12
 * This validator implements strict validation rules from RFC 5322 (superseding RFC 822),
13
 * including proper handling of:
14
 * - Quoted strings in local part
15
 * - Comments
16
 * - Domain literals
17
 * - Special characters
18
 * - Length restrictions
19
 */
20
class Rfc5322Validator extends AValidator
21
{
22
    // Maximum lengths defined by RFC 5322
23
    private const MAX_LOCAL_PART_LENGTH = 64;
24
    private const MAX_DOMAIN_LABEL_LENGTH = 63;
25
    private const MAX_DOMAIN_LENGTH = 255;
26
27
    // Character sets for unquoted local part
28
    private const LOCAL_PART_ALLOWED_CHARS = '!#$%&\'*+-/=?^_`{|}~.';
29
30
    /**
31
     * Validates an email address according to RFC 5322 rules
32
     *
33
     * @param EmailAddress $email The email address to validate
34
     * @return bool True if the email address is valid according to RFC 5322
35
     */
36
    public function validate(EmailAddress $email): bool
37
    {
38
        $localPart = $email->getLocalPart();
39
        $domain = $email->getDomain();
40
41
        if ($localPart === null || $domain === null) {
42
            return false;
43
        }
44
45
        return $this->validateLocalPart($localPart) && $this->validateDomain($domain);
46
    }
47
48
    /**
49
     * Validates the local part of an email address
50
     *
51
     * @param string $localPart The local part to validate
52
     * @return bool True if the local part is valid
53
     */
54
    private function validateLocalPart(string $localPart): bool
55
    {
56
        // Check length
57
        if (!$this->validateLocalPartLength($localPart)) {
58
            return false;
59
        }
60
61
        // Empty local part is invalid
62
        if ($localPart === '') {
63
            return false;
64
        }
65
66
        // Handle quoted string
67
        if ($this->isQuotedString($localPart)) {
68
            return $this->validateQuotedString($localPart);
69
        }
70
71
        // Handle dot-atom format
72
        return $this->validateDotAtom($localPart);
73
    }
74
75
    /**
76
     * Validates the length of a local part
77
     *
78
     * @param string $localPart The local part to validate
79
     * @return bool True if the length is valid
80
     */
81
    private function validateLocalPartLength(string $localPart): bool
82
    {
83
        return strlen($localPart) <= self::MAX_LOCAL_PART_LENGTH;
84
    }
85
86
    /**
87
     * Checks if a local part is a quoted string
88
     *
89
     * @param string $localPart The local part to check
90
     * @return bool True if the local part is a quoted string
91
     */
92
    private function isQuotedString(string $localPart): bool
93
    {
94
        return $localPart[0] === '"';
95
    }
96
97
    /**
98
     * Validates a dot-atom format local part
99
     *
100
     * @param string $localPart The unquoted local part to validate
101
     * @return bool True if the unquoted local part is valid
102
     */
103
    private function validateDotAtom(string $localPart): bool
104
    {
105
        // Split into atoms
106
        $atoms = explode('.', $localPart);
107
108
        // Check each atom
109
        foreach ($atoms as $atom) {
110
            if (!$this->validateAtom($atom)) {
111
                return false;
112
            }
113
        }
114
115
        return true;
116
    }
117
118
    /**
119
     * Validates a single atom in a dot-atom local part
120
     *
121
     * @param string $atom The atom to validate
122
     * @return bool True if the atom is valid
123
     */
124
    private function validateAtom(string $atom): bool
125
    {
126
        if ($atom === '') {
127
            return false;
128
        }
129
130
        // Check for valid characters in each atom
131
        return (bool)preg_match('/^[a-zA-Z0-9!#$%&\'*+\-\/=?^_`{|}~]+$/', $atom);
132
    }
133
134
    /**
135
     * Validates a quoted string local part
136
     *
137
     * @param string $localPart The quoted string to validate
138
     * @return bool True if the quoted string is valid
139
     */
140
    private function validateQuotedString(string $localPart): bool
141
    {
142
        // Must start and end with quotes
143
        if (!$this->hasValidQuotes($localPart)) {
144
            return false;
145
        }
146
147
        // Remove outer quotes for content validation
148
        $content = substr($localPart, 1, -1);
149
150
        // Empty quoted strings are valid
151
        if ($content === '') {
152
            return true;
153
        }
154
155
        return $this->validateQuotedStringContent($content);
156
    }
157
158
    /**
159
     * Checks if a quoted string has valid opening and closing quotes
160
     *
161
     * @param string $localPart The quoted string to validate
162
     * @return bool True if the quotes are valid
163
     */
164
    private function hasValidQuotes(string $localPart): bool
165
    {
166
        return (bool)preg_match('/^".*"$/', $localPart);
167
    }
168
169
    /**
170
     * Validates the content of a quoted string
171
     *
172
     * @param string $content The content to validate (without outer quotes)
173
     * @return bool True if the content is valid
174
     */
175
    private function validateQuotedStringContent(string $content): bool
176
    {
177
        $inEscape = false;
178
        for ($i = 0, $iMax = strlen($content); $i < $iMax; $i++) {
179
            $char = $content[$i];
180
            $charCode = ord($char);
181
182
            // Non-printable characters are never allowed
183
            if ($charCode < 32 || $charCode > 126) {
184
                return false;
185
            }
186
187
            if ($inEscape) {
188
                // Only quotes and backslashes must be escaped
189
                // Other characters may be escaped but it's not required
190
                $inEscape = false;
191
                continue;
192
            }
193
194
            if ($char === '\\') {
195
                $inEscape = true;
196
                continue;
197
            }
198
199
            // Unescaped quotes are not allowed
200
            if ($char === '"') {
201
                return false;
202
            }
203
        }
204
205
        // Can't end with a lone backslash
206
        return !$inEscape;
207
    }
208
209
    /**
210
     * Validates the domain part of an email address
211
     *
212
     * @param string $domain The domain to validate
213
     * @return bool True if the domain is valid
214
     */
215
    private function validateDomain(string $domain): bool
216
    {
217
        // Check for empty domain
218
        if ($domain === '') {
219
            return false;
220
        }
221
222
        // Check total length
223
        if (!$this->validateDomainLength($domain)) {
224
            return false;
225
        }
226
227
        // Handle domain literal
228
        if ($this->isDomainLiteral($domain)) {
229
            return $this->validateDomainLiteral($domain);
230
        }
231
232
        // Validate regular domain
233
        return $this->validateDomainName($domain);
234
    }
235
236
    /**
237
     * Validates the length of a domain
238
     *
239
     * @param string $domain The domain to validate
240
     * @return bool True if the length is valid
241
     */
242
    private function validateDomainLength(string $domain): bool
243
    {
244
        return strlen($domain) <= self::MAX_DOMAIN_LENGTH;
245
    }
246
247
    /**
248
     * Checks if a domain is a domain literal
249
     *
250
     * @param string $domain The domain to check
251
     * @return bool True if the domain is a domain literal
252
     */
253
    private function isDomainLiteral(string $domain): bool
254
    {
255
        return $domain[0] === '[';
256
    }
257
258
    /**
259
     * Validates a domain name
260
     *
261
     * @param string $domain The domain name to validate
262
     * @return bool True if the domain name is valid
263
     */
264
    private function validateDomainName(string $domain): bool
265
    {
266
        // Split into labels
267
        $labels = explode('.', $domain);
268
269
        // Must have at least two labels
270
        if (count($labels) < 2) {
271
            return false;
272
        }
273
274
        // Validate each label
275
        foreach ($labels as $label) {
276
            if (!$this->validateDomainLabel($label)) {
277
                return false;
278
            }
279
        }
280
281
        return true;
282
    }
283
284
    /**
285
     * Validates a single domain label
286
     *
287
     * @param string $label The domain label to validate
288
     * @return bool True if the domain label is valid
289
     */
290
    private function validateDomainLabel(string $label): bool
291
    {
292
        // Check length
293
        if (!$this->validateDomainLabelLength($label)) {
294
            return false;
295
        }
296
297
        // Must start and end with alphanumeric
298
        if (!$this->hasValidLabelBoundaries($label)) {
299
            return false;
300
        }
301
302
        // Check for valid characters and format
303
        if (!$this->hasValidLabelFormat($label)) {
304
            return false;
305
        }
306
307
        // Check for consecutive hyphens
308
        return !$this->hasConsecutiveHyphens($label);
309
    }
310
311
    /**
312
     * Validates the length of a domain label
313
     *
314
     * @param string $label The domain label to validate
315
     * @return bool True if the length is valid
316
     */
317
    private function validateDomainLabelLength(string $label): bool
318
    {
319
        return strlen($label) <= self::MAX_DOMAIN_LABEL_LENGTH && $label !== '';
320
    }
321
322
    /**
323
     * Checks if a domain label has valid start and end characters
324
     *
325
     * @param string $label The domain label to validate
326
     * @return bool True if the boundaries are valid
327
     */
328
    private function hasValidLabelBoundaries(string $label): bool
329
    {
330
        return ctype_alnum($label[0]) && ctype_alnum(substr($label, -1));
331
    }
332
333
    /**
334
     * Checks if a domain label has valid format
335
     *
336
     * @param string $label The domain label to validate
337
     * @return bool True if the format is valid
338
     */
339
    private function hasValidLabelFormat(string $label): bool
340
    {
341
        return (bool)preg_match('/^[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9]$/', $label);
342
    }
343
344
    /**
345
     * Checks if a domain label has consecutive hyphens
346
     *
347
     * @param string $label The domain label to validate
348
     * @return bool True if the label has consecutive hyphens
349
     */
350
    private function hasConsecutiveHyphens(string $label): bool
351
    {
352
        return strpos($label, '--') !== false;
353
    }
354
355
    /**
356
     * Validates a domain literal (IP address in brackets)
357
     *
358
     * @param string $domain The domain literal to validate
359
     * @return bool True if the domain literal is valid
360
     */
361
    private function validateDomainLiteral(string $domain): bool
362
    {
363
        // Must be enclosed in brackets
364
        if (!preg_match('/^\[(.*)]$/', $domain, $matches)) {
365
            return false;
366
        }
367
368
        $content = $matches[1];
369
370
        // Handle IPv6
371
        if (stripos($content, 'IPv6:') === 0) {
372
            return $this->validateIPv6($content);
373
        }
374
375
        // Handle IPv4
376
        return $this->validateIPv4($content);
377
    }
378
379
    /**
380
     * Validates an IPv6 address
381
     *
382
     * @param string $content The IPv6 address to validate (including 'IPv6:' prefix)
383
     * @return bool True if the IPv6 address is valid
384
     */
385
    private function validateIPv6(string $content): bool
386
    {
387
        $ipv6 = substr($content, 5);
388
        // Remove any whitespace
389
        $ipv6 = trim($ipv6);
390
391
        $segments = $this->parseIPv6Segments($ipv6);
392
        if ($segments === null) {
393
            return false;
394
        }
395
396
        // Validate each segment
397
        foreach ($segments as $segment) {
398
            if (!preg_match('/^[0-9A-Fa-f]{1,4}$/', $segment)) {
399
                return false;
400
            }
401
        }
402
403
        // Convert to standard format for final validation
404
        $ipv6 = implode(':', array_map(function ($segment) {
405
            return str_pad($segment, 4, '0', STR_PAD_LEFT);
406
        }, $segments));
407
408
        return filter_var($ipv6, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6) !== false;
409
    }
410
411
    /**
412
     * Parses IPv6 address segments, handling compressed notation
413
     *
414
     * @param string $ipv6 The IPv6 address to parse
415
     * @return array|null Array of segments if valid, null if invalid
416
     */
417
    private function parseIPv6Segments(string $ipv6): ?array
418
    {
419
        // Handle compressed notation
420
        if (strpos($ipv6, '::') !== false) {
421
            // Only one :: allowed
422
            if (substr_count($ipv6, '::') > 1) {
423
                return null;
424
            }
425
426
            // Split on ::
427
            $parts = explode('::', $ipv6);
428
            if (count($parts) !== 2) {
429
                return null;
430
            }
431
432
            // Count segments on each side
433
            $leftSegments = $parts[0] ? explode(':', $parts[0]) : [];
434
            $rightSegments = $parts[1] ? explode(':', $parts[1]) : [];
435
436
            // Calculate missing segments
437
            $totalSegments = count($leftSegments) + count($rightSegments);
438
            if ($totalSegments >= 8) {
439
                return null;
440
            }
441
442
            // Fill in missing segments
443
            $middleSegments = array_fill(0, 8 - $totalSegments, '0');
444
445
            // Combine all segments
446
            return array_merge($leftSegments, $middleSegments, $rightSegments);
447
        }
448
449
        $segments = explode(':', $ipv6);
450
        return count($segments) === 8 ? $segments : null;
451
    }
452
453
    /**
454
     * Validates an IPv4 address
455
     *
456
     * @param string $content The IPv4 address to validate
457
     * @return bool True if the IPv4 address is valid
458
     */
459
    private function validateIPv4(string $content): bool
460
    {
461
        $ipv4 = trim($content);
462
        $octets = $this->parseIPv4Octets($ipv4);
463
        if ($octets === null) {
464
            return false;
465
        }
466
467
        // Convert to standard format for final validation
468
        $ipv4 = implode('.', array_map(function ($octet) {
469
            return ltrim($octet, '0') ?: '0';
470
        }, $octets));
471
472
        return filter_var($ipv4, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4) !== false;
473
    }
474
475
    /**
476
     * Parses IPv4 address octets
477
     *
478
     * @param string $ipv4 The IPv4 address to parse
479
     * @return array|null Array of octets if valid, null if invalid
480
     */
481
    private function parseIPv4Octets(string $ipv4): ?array
482
    {
483
        // Split into octets
484
        $octets = explode('.', $ipv4);
485
        if (count($octets) !== 4) {
486
            return null;
487
        }
488
489
        // Validate each octet
490
        foreach ($octets as $octet) {
491
            // Empty octets are invalid
492
            if ($octet === '') {
493
                return null;
494
            }
495
496
            // Remove leading zeros
497
            $octet = ltrim($octet, '0');
498
            if ($octet === '') {
499
                $octet = '0';
500
            }
501
502
            // Check numeric value
503
            if (!is_numeric($octet) || intval($octet) < 0 || intval($octet) > 255) {
504
                return null;
505
            }
506
        }
507
508
        return $octets;
509
    }
510
}
511