1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* This file is part of the ZBateson\MailMimeParser project. |
4
|
|
|
* |
5
|
|
|
* @license http://opensource.org/licenses/bsd-license.php BSD |
6
|
|
|
*/ |
7
|
|
|
|
8
|
|
|
namespace ZBateson\MailMimeParser\Header\Consumer\Received; |
9
|
|
|
|
10
|
|
|
use ZBateson\MailMimeParser\Header\Part\CommentPart; |
11
|
|
|
|
12
|
|
|
/** |
13
|
|
|
* Parses a so-called "extended-domain" (from and by) part of a Received header. |
14
|
|
|
* |
15
|
|
|
* Looks for and extracts the following fields from an extended-domain part: |
16
|
|
|
* Name, Hostname and Address. |
17
|
|
|
* |
18
|
|
|
* The Name part is always the portion of the extended-domain part existing on |
19
|
|
|
* its own, outside of the parenthesized hostname and address part. This is |
20
|
|
|
* true regardless of whether an address is used as the name, as its assumed to |
21
|
|
|
* be the string used to identify the server, whatever it may be. |
22
|
|
|
* |
23
|
|
|
* The parenthesized part normally (but not necessarily) following a name must |
24
|
|
|
* "look like" a tcp-info section of an extended domain as defined by RFC5321. |
25
|
|
|
* The validation is very purposefully very loose to be accommodating to many |
26
|
|
|
* erroneous implementations. The only restriction is the host part must |
27
|
|
|
* contain two characters, the first being alphanumeric, followed by any number |
28
|
|
|
* of more alphanumeric, '.', and '-' characters. The address part must be |
29
|
|
|
* within square brackets, '[]'... although an address outside of square |
30
|
|
|
* brackets could be matched by the domain matcher if it exists alone within the |
31
|
|
|
* parentheses. The address is any number of '.', numbers, ':' and letters a-f. |
32
|
|
|
* This allows it to match ipv6 addresses as well. In addition, the address may |
33
|
|
|
* start with the string "ipv6", and may be followed by a port number as some |
34
|
|
|
* implementations seem to do. |
35
|
|
|
* |
36
|
|
|
* Strings in parentheses not matching the aforementioned 'domain/address' |
37
|
|
|
* pattern will be considered comments, and will be returned as a separate |
38
|
|
|
* CommentPart. |
39
|
|
|
* |
40
|
|
|
* @see https://tools.ietf.org/html/rfc5321#section-4.4 |
41
|
|
|
* @see https://github.com/Te-k/pyreceived/blob/master/test.py |
42
|
|
|
* @author Zaahid Bateson |
43
|
|
|
* @author Mariusz Krzaczkowski |
44
|
|
|
*/ |
45
|
|
|
class DomainConsumerService extends GenericReceivedConsumerService |
46
|
|
|
{ |
47
|
|
|
/** |
48
|
|
|
* Overridden to return true if the passed token is a closing parenthesis. |
49
|
|
|
*/ |
50
|
7 |
|
protected function isEndToken(string $token) : bool |
51
|
|
|
{ |
52
|
7 |
|
if ($token === ')') { |
53
|
4 |
|
return true; |
54
|
|
|
} |
55
|
7 |
|
return parent::isEndToken($token); |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
/** |
59
|
|
|
* Attempts to match a parenthesized expression to find a hostname and an |
60
|
|
|
* address. Returns true if the expression matched, and either hostname or |
61
|
|
|
* address were found. |
62
|
|
|
*/ |
63
|
4 |
|
private function matchHostPart(string $value, ?string &$hostname, ?string &$address) : bool |
64
|
|
|
{ |
65
|
4 |
|
$matches = []; |
66
|
4 |
|
$pattern = '~^(\[(IPv[64])?(?P<addr1>[a-f\d\.\:]+)\])?\s*(helo=)?(?P<name>[a-z0-9\-]+[a-z0-9\-\.]+)?\s*(\[(IPv[64])?(?P<addr2>[a-f\d\.\:]+)\])?$~i'; |
67
|
4 |
|
if (\preg_match($pattern, $value, $matches)) { |
68
|
4 |
|
if (!empty($matches['name'])) { |
69
|
3 |
|
$hostname = $matches['name']; |
70
|
|
|
} |
71
|
4 |
|
if (!empty($matches['addr1'])) { |
72
|
3 |
|
$address = $matches['addr1']; |
73
|
|
|
} |
74
|
4 |
|
if (!empty($matches['addr2'])) { |
75
|
1 |
|
$address = $matches['addr2']; |
76
|
|
|
} |
77
|
4 |
|
return true; |
78
|
|
|
} |
79
|
1 |
|
return false; |
80
|
|
|
} |
81
|
|
|
|
82
|
|
|
/** |
83
|
|
|
* Creates a single ReceivedDomainPart out of matched parts. If an |
84
|
|
|
* unmatched parenthesized expression was found, it's returned as a |
85
|
|
|
* CommentPart. |
86
|
|
|
* |
87
|
|
|
* @param \ZBateson\MailMimeParser\Header\Part\HeaderPart[] $parts |
88
|
|
|
* @return \ZBateson\MailMimeParser\Header\Part\ReceivedDomainPart[]|\ZBateson\MailMimeParser\Header\Part\CommentPart[]|\ZBateson\MailMimeParser\Header\Part\HeaderPart[] |
89
|
|
|
*/ |
90
|
7 |
|
protected function processParts(array $parts) : array |
91
|
|
|
{ |
92
|
7 |
|
$ehloName = null; |
93
|
7 |
|
$hostname = null; |
94
|
7 |
|
$address = null; |
95
|
7 |
|
$commentPart = null; |
96
|
|
|
|
97
|
7 |
|
$filtered = $this->filterIgnoredSpaces($parts); |
98
|
7 |
|
foreach ($filtered as $part) { |
99
|
7 |
|
if ($part instanceof CommentPart) { |
100
|
4 |
|
$commentPart = $part; |
101
|
4 |
|
continue; |
102
|
|
|
} |
103
|
7 |
|
$ehloName .= $part->getValue(); |
104
|
|
|
} |
105
|
|
|
|
106
|
7 |
|
$strValue = $ehloName; |
107
|
7 |
|
if ($commentPart !== null && $this->matchHostPart($commentPart->getComment(), $hostname, $address)) { |
108
|
4 |
|
$strValue .= ' (' . $commentPart->getComment() . ')'; |
109
|
4 |
|
$commentPart = null; |
110
|
|
|
} |
111
|
|
|
|
112
|
7 |
|
$domainPart = $this->partFactory->newReceivedDomainPart( |
113
|
7 |
|
$this->partName, |
114
|
7 |
|
$strValue, |
115
|
7 |
|
$ehloName, |
116
|
7 |
|
$hostname, |
117
|
7 |
|
$address |
118
|
7 |
|
); |
119
|
7 |
|
return \array_values(\array_filter([$domainPart, $commentPart])); |
120
|
|
|
} |
121
|
|
|
} |
122
|
|
|
|