|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* This file is part of the ZBateson\MailMimeParser project. |
|
4
|
|
|
* |
|
5
|
|
|
* @license http://opensource.org/licenses/bsd-license.php BSD |
|
6
|
|
|
*/ |
|
7
|
|
|
|
|
8
|
|
|
namespace ZBateson\MailMimeParser\Header\Consumer\Received; |
|
9
|
|
|
|
|
10
|
|
|
use ZBateson\MailMimeParser\Header\Part\CommentPart; |
|
11
|
|
|
|
|
12
|
|
|
/** |
|
13
|
|
|
* Parses a so-called "extended-domain" (from and by) part of a Received header. |
|
14
|
|
|
* |
|
15
|
|
|
* Looks for and extracts the following fields from an extended-domain part: |
|
16
|
|
|
* Name, Hostname and Address. |
|
17
|
|
|
* |
|
18
|
|
|
* The Name part is always the portion of the extended-domain part existing on |
|
19
|
|
|
* its own, outside of the parenthesized hostname and address part. This is |
|
20
|
|
|
* true regardless of whether an address is used as the name, as its assumed to |
|
21
|
|
|
* be the string used to identify the server, whatever it may be. |
|
22
|
|
|
* |
|
23
|
|
|
* The parenthesized part normally (but not necessarily) following a name must |
|
24
|
|
|
* "look like" a tcp-info section of an extended domain as defined by RFC5321. |
|
25
|
|
|
* The validation is very purposefully very loose to be accommodating to many |
|
26
|
|
|
* erroneous implementations. The only restriction is the host part must |
|
27
|
|
|
* contain two characters, the first being alphanumeric, followed by any number |
|
28
|
|
|
* of more alphanumeric, '.', and '-' characters. The address part must be |
|
29
|
|
|
* within square brackets, '[]'... although an address outside of square |
|
30
|
|
|
* brackets could be matched by the domain matcher if it exists alone within the |
|
31
|
|
|
* parentheses. The address is any number of '.', numbers, ':' and letters a-f. |
|
32
|
|
|
* This allows it to match ipv6 addresses as well. In addition, the address may |
|
33
|
|
|
* start with the string "ipv6", and may be followed by a port number as some |
|
34
|
|
|
* implementations seem to do. |
|
35
|
|
|
* |
|
36
|
|
|
* Strings in parentheses not matching the aforementioned 'domain/address' |
|
37
|
|
|
* pattern will be considered comments, and will be returned as a separate |
|
38
|
|
|
* CommentPart. |
|
39
|
|
|
* |
|
40
|
|
|
* @see https://tools.ietf.org/html/rfc5321#section-4.4 |
|
41
|
|
|
* @see https://github.com/Te-k/pyreceived/blob/master/test.py |
|
42
|
|
|
* @author Zaahid Bateson |
|
43
|
|
|
* @author Mariusz Krzaczkowski |
|
44
|
|
|
*/ |
|
45
|
|
|
class DomainConsumerService extends GenericReceivedConsumerService |
|
46
|
|
|
{ |
|
47
|
|
|
/** |
|
48
|
|
|
* Overridden to return true if the passed token is a closing parenthesis. |
|
49
|
|
|
*/ |
|
50
|
7 |
|
protected function isEndToken(string $token) : bool |
|
51
|
|
|
{ |
|
52
|
7 |
|
if ($token === ')') { |
|
53
|
4 |
|
return true; |
|
54
|
|
|
} |
|
55
|
7 |
|
return parent::isEndToken($token); |
|
56
|
|
|
} |
|
57
|
|
|
|
|
58
|
|
|
/** |
|
59
|
|
|
* Attempts to match a parenthesized expression to find a hostname and an |
|
60
|
|
|
* address. Returns true if the expression matched, and either hostname or |
|
61
|
|
|
* address were found. |
|
62
|
|
|
*/ |
|
63
|
4 |
|
private function matchHostPart(string $value, ?string &$hostname, ?string &$address) : bool |
|
64
|
|
|
{ |
|
65
|
4 |
|
$matches = []; |
|
66
|
4 |
|
$pattern = '~^(\[(IPv[64])?(?P<addr1>[a-f\d\.\:]+)\])?\s*(helo=)?(?P<name>[a-z0-9\-]+[a-z0-9\-\.]+)?\s*(\[(IPv[64])?(?P<addr2>[a-f\d\.\:]+)\])?$~i'; |
|
67
|
4 |
|
if (\preg_match($pattern, $value, $matches)) { |
|
68
|
4 |
|
if (!empty($matches['name'])) { |
|
69
|
3 |
|
$hostname = $matches['name']; |
|
70
|
|
|
} |
|
71
|
4 |
|
if (!empty($matches['addr1'])) { |
|
72
|
3 |
|
$address = $matches['addr1']; |
|
73
|
|
|
} |
|
74
|
4 |
|
if (!empty($matches['addr2'])) { |
|
75
|
1 |
|
$address = $matches['addr2']; |
|
76
|
|
|
} |
|
77
|
4 |
|
return true; |
|
78
|
|
|
} |
|
79
|
1 |
|
return false; |
|
80
|
|
|
} |
|
81
|
|
|
|
|
82
|
|
|
/** |
|
83
|
|
|
* Creates a single ReceivedDomainPart out of matched parts. If an |
|
84
|
|
|
* unmatched parenthesized expression was found, it's returned as a |
|
85
|
|
|
* CommentPart. |
|
86
|
|
|
* |
|
87
|
|
|
* @param \ZBateson\MailMimeParser\Header\Part\HeaderPart[] $parts |
|
88
|
|
|
* @return \ZBateson\MailMimeParser\Header\Part\ReceivedDomainPart[]|\ZBateson\MailMimeParser\Header\Part\CommentPart[]|\ZBateson\MailMimeParser\Header\Part\HeaderPart[] |
|
89
|
|
|
*/ |
|
90
|
7 |
|
protected function processParts(array $parts) : array |
|
91
|
|
|
{ |
|
92
|
7 |
|
$ehloName = null; |
|
93
|
7 |
|
$hostname = null; |
|
94
|
7 |
|
$address = null; |
|
95
|
7 |
|
$commentPart = null; |
|
96
|
|
|
|
|
97
|
7 |
|
$filtered = $this->filterIgnoredSpaces($parts); |
|
98
|
7 |
|
foreach ($filtered as $part) { |
|
99
|
7 |
|
if ($part instanceof CommentPart) { |
|
100
|
4 |
|
$commentPart = $part; |
|
101
|
4 |
|
continue; |
|
102
|
|
|
} |
|
103
|
7 |
|
$ehloName .= $part->getValue(); |
|
104
|
|
|
} |
|
105
|
|
|
|
|
106
|
7 |
|
$strValue = $ehloName; |
|
107
|
7 |
|
if ($commentPart !== null && $this->matchHostPart($commentPart->getComment(), $hostname, $address)) { |
|
108
|
4 |
|
$strValue .= ' (' . $commentPart->getComment() . ')'; |
|
109
|
4 |
|
$commentPart = null; |
|
110
|
|
|
} |
|
111
|
|
|
|
|
112
|
7 |
|
$domainPart = $this->partFactory->newReceivedDomainPart( |
|
113
|
7 |
|
$this->partName, |
|
114
|
7 |
|
$strValue, |
|
115
|
7 |
|
$ehloName, |
|
116
|
7 |
|
$hostname, |
|
117
|
7 |
|
$address |
|
118
|
7 |
|
); |
|
119
|
7 |
|
return \array_values(\array_filter([$domainPart, $commentPart])); |
|
120
|
|
|
} |
|
121
|
|
|
} |
|
122
|
|
|
|