|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* This file is part of the ZBateson\MailMimeParser project. |
|
4
|
|
|
* |
|
5
|
|
|
* @license http://opensource.org/licenses/bsd-license.php BSD |
|
6
|
|
|
*/ |
|
7
|
|
|
namespace ZBateson\MailMimeParser\Header\Consumer\Received; |
|
8
|
|
|
|
|
9
|
|
|
use ZBateson\MailMimeParser\Header\Part\CommentPart; |
|
10
|
|
|
|
|
11
|
|
|
/** |
|
12
|
|
|
* Parses a so-called "extended-domain" (from and by) part of a Received header. |
|
13
|
|
|
* |
|
14
|
|
|
* Looks for and extracts the following fields from an extended-domain part: |
|
15
|
|
|
* Name, Hostname and Address. |
|
16
|
|
|
* |
|
17
|
|
|
* The Name part is always the portion of the extended-domain part existing on |
|
18
|
|
|
* its own, outside of the parenthesized hostname and address part. This is |
|
19
|
|
|
* true regardless of whether an address is used as the name, as its assumed to |
|
20
|
|
|
* be the string used to identify the server, whatever it may be. |
|
21
|
|
|
* |
|
22
|
|
|
* The parenthesized part normally (but not necessarily) following a name must |
|
23
|
|
|
* "look like" a tcp-info section of an extended domain as defined by RFC5321. |
|
24
|
|
|
* The validation is very purposefully very loose to be accommodating to many |
|
25
|
|
|
* erroneous implementations. Strictly speaking, a domain part, if it exists, |
|
26
|
|
|
* must start with an alphanumeric character. There must be at least one '.' in |
|
27
|
|
|
* the domain part, followed by any number of more alphanumeric, '.', and '-' |
|
28
|
|
|
* characters. The address part must be within square brackets, '[]'... |
|
29
|
|
|
* although an address outside of square brackets could be matched by the domain |
|
30
|
|
|
* matcher if it exists alone within the parentheses. The address, strictly |
|
31
|
|
|
* speaking, is any number of '.', numbers, ':' and letters a-f. This allows it |
|
32
|
|
|
* to match ipv6 addresses as well. In addition, the address may start with the |
|
33
|
|
|
* string "ipv6", and may be followed by a port number as some implementations |
|
34
|
|
|
* seem to do. |
|
35
|
|
|
* |
|
36
|
|
|
* Strings in parentheses not matching the aforementioned 'domain/address' |
|
37
|
|
|
* pattern will be considered comments, and will be returned as a separate |
|
38
|
|
|
* CommentPart. |
|
39
|
|
|
* |
|
40
|
|
|
* @see https://tools.ietf.org/html/rfc5321#section-4.4 |
|
41
|
|
|
* @see https://github.com/Te-k/pyreceived/blob/master/test.py |
|
42
|
|
|
* @author Zaahid Bateson |
|
43
|
|
|
*/ |
|
44
|
|
|
class DomainConsumer extends GenericReceivedConsumer |
|
45
|
|
|
{ |
|
46
|
|
|
/** |
|
47
|
|
|
* Overridden to return true if the passed token is a closing parenthesis. |
|
48
|
|
|
* |
|
49
|
|
|
* @param string $token |
|
50
|
|
|
* @return bool |
|
51
|
|
|
*/ |
|
52
|
|
|
protected function isEndToken($token) |
|
53
|
|
|
{ |
|
54
|
|
|
if ($token === ')') { |
|
55
|
|
|
return true; |
|
56
|
|
|
} |
|
57
|
|
|
return parent::isEndToken($token); |
|
58
|
|
|
} |
|
59
|
|
|
|
|
60
|
|
|
/** |
|
61
|
|
|
* Attempts to match a parenthesized expression to find a hostname and an |
|
62
|
|
|
* address. Returns true if the expression matched, and either hostname or |
|
63
|
|
|
* address were found. |
|
64
|
|
|
* |
|
65
|
|
|
* @param string $value |
|
66
|
|
|
* @param string $hostname |
|
67
|
|
|
* @param string $address |
|
68
|
|
|
* @return boolean |
|
69
|
|
|
*/ |
|
70
|
|
|
private function matchHostPart($value, &$hostname, &$address) { |
|
71
|
|
|
$matches = []; |
|
72
|
|
|
$pattern = '~^(?P<name>[a-z0-9\-]+\.[a-z0-9\-\.]+)?\s*(\[(IPv[64])?(?P<addr>[a-f\d\.\:]+)\])?$~i'; |
|
73
|
|
|
if (preg_match($pattern, $value, $matches)) { |
|
74
|
|
|
if (!empty($matches['name'])) { |
|
75
|
|
|
$hostname = $matches['name']; |
|
76
|
|
|
} |
|
77
|
|
|
if (!empty($matches['addr'])) { |
|
78
|
|
|
$address = $matches['addr']; |
|
79
|
|
|
} |
|
80
|
|
|
return true; |
|
81
|
|
|
} |
|
82
|
|
|
return false; |
|
83
|
|
|
} |
|
84
|
|
|
|
|
85
|
|
|
/** |
|
86
|
|
|
* Creates a single ReceivedDomainPart out of matched parts. If an |
|
87
|
|
|
* unmatched parenthesized expression was found, it's returned as a |
|
88
|
|
|
* CommentPart. |
|
89
|
|
|
* |
|
90
|
|
|
* @param \ZBateson\MailMimeParser\Header\Part\HeaderPart[] $parts |
|
91
|
|
|
* @return \ZBateson\MailMimeParser\Header\Part\ReceivedDomainPart[]| |
|
92
|
|
|
* \ZBateson\MailMimeParser\Header\Part\CommentPart[]array |
|
93
|
|
|
*/ |
|
94
|
|
|
protected function processParts(array $parts) |
|
95
|
|
|
{ |
|
96
|
|
|
$ehloName = null; |
|
97
|
|
|
$hostname = null; |
|
98
|
|
|
$address = null; |
|
99
|
|
|
$commentPart = null; |
|
100
|
|
|
|
|
101
|
|
|
$filtered = $this->filterIgnoredSpaces($parts); |
|
102
|
|
|
foreach ($filtered as $part) { |
|
103
|
|
|
if ($part instanceof CommentPart) { |
|
104
|
|
|
$commentPart = $part; |
|
105
|
|
|
continue; |
|
106
|
|
|
} |
|
107
|
|
|
$ehloName .= $part->getValue(); |
|
108
|
|
|
} |
|
109
|
|
|
|
|
110
|
|
|
$strValue = $ehloName; |
|
111
|
|
|
if ($commentPart !== null && $this->matchHostPart($commentPart->getComment(), $hostname, $address)) { |
|
112
|
|
|
$strValue .= ' (' . $commentPart->getComment() . ')'; |
|
113
|
|
|
$commentPart = null; |
|
114
|
|
|
} |
|
115
|
|
|
|
|
116
|
|
|
$domainPart = $this->partFactory->newReceivedDomainPart( |
|
117
|
|
|
$this->getPartName(), |
|
118
|
|
|
$strValue, |
|
119
|
|
|
$ehloName, |
|
120
|
|
|
$hostname, |
|
121
|
|
|
$address |
|
122
|
|
|
); |
|
123
|
|
|
return array_filter([ $domainPart, $commentPart ]); |
|
124
|
|
|
} |
|
125
|
|
|
} |
|
126
|
|
|
|