1
|
|
|
<?php |
2
|
|
|
declare(strict_types=1); |
3
|
|
|
|
4
|
|
|
namespace Spires\Irc; |
5
|
|
|
|
6
|
|
|
class Parser |
7
|
|
|
{ |
8
|
|
|
/** |
9
|
|
|
* Parse a raw message following the message format defined in |
10
|
|
|
* RFC 2812 for the Internet Relay Chat: Client Protocol |
11
|
|
|
* http://tools.ietf.org/html/rfc2812#section-2.3.1 |
12
|
|
|
* |
13
|
|
|
* @param string $raw |
14
|
|
|
* @return array |
15
|
|
|
*/ |
16
|
|
|
public function parse(string $raw): array |
17
|
|
|
{ |
18
|
|
|
// = %x0D %x0A ; "carriage return" "linefeed" |
19
|
|
|
$crlf = '\r\n'; |
20
|
|
|
|
21
|
|
|
// = %x41-5A / %x61-7A ; A-Z / a-z |
22
|
|
|
$letter = 'A-Z|a-z'; |
23
|
|
|
|
24
|
|
|
// = %x30-39 ; 0-9 |
25
|
|
|
$digit = '0-9'; |
26
|
|
|
|
27
|
|
|
// = digit / "A" / "B" / "C" / "D" / "E" / "F" |
28
|
|
|
$hexdigit = "$digit|a-f|A-F"; |
29
|
|
|
|
30
|
|
|
// = %x5B-60 / %x7B-7D ; "[", "]", "\", "`", "_", "^", "{", "|", "}" |
|
|
|
|
31
|
|
|
$special = '\x5B-\x60|\x7B-\x7D'; |
32
|
|
|
|
33
|
|
|
// = %x01-09 / %x0B-0C / %x0E-1F / %x21-39 / %x3B-FF ; any octet except NUL, CR, LF, " " and ":" |
34
|
|
|
$nospcrlfcl = '\x01-\x09|\x0B-\x0C|\x0E-\x1F|\x21-\x39|\x3B-\xFF'; |
35
|
|
|
|
36
|
|
|
// = %x20 ; space character |
37
|
|
|
$space = '\x20'; |
38
|
|
|
|
39
|
|
|
// Variables to make the following regular expressions easier to follow |
40
|
|
|
$colon = ':'; |
41
|
|
|
$bang = '!'; |
42
|
|
|
$at = '@'; |
43
|
|
|
$dash = '-'; |
44
|
|
|
$slash = '\/'; |
45
|
|
|
$dot = '\.'; |
46
|
|
|
|
47
|
|
|
// = 1*letter / 3digit |
48
|
|
|
$command = "(?:[$letter]+|[$digit]{3})"; |
49
|
|
|
|
50
|
|
|
// = nospcrlfcl *( ":" / nospcrlfcl ) |
51
|
|
|
$middle = "(?:[$nospcrlfcl][$colon|$nospcrlfcl]*)"; |
52
|
|
|
|
53
|
|
|
// = *( ":" / " " / nospcrlfcl ) |
54
|
|
|
$trailing = "(?:[$colon|$space|$nospcrlfcl]*)"; |
55
|
|
|
|
56
|
|
|
// = *14( SPACE middle ) [ SPACE ":" trailing ] |
57
|
|
|
// =/ 14( SPACE middle ) [ SPACE [ ":" ] trailing ] |
58
|
|
|
$params = "(?:(?:$space$middle){0,14}(?:$space$colon$trailing)?" . |
59
|
|
|
"|(?:$space$middle){14}(?:$space(?:$colon)?$trailing)?)"; |
60
|
|
|
|
61
|
|
|
// = ( letter / digit ) *( letter / digit / "-" ) *( letter / digit ) |
62
|
|
|
$shortname = "(?:[$letter$digit][$letter$digit$dash$slash]*[$letter$digit]*)"; |
63
|
|
|
|
64
|
|
|
// = shortname *( "." shortname ) |
65
|
|
|
$hostname = "(?:$shortname(?:$dot$shortname)*)"; |
66
|
|
|
|
67
|
|
|
// = hostname |
68
|
|
|
$servername = "$hostname"; |
69
|
|
|
|
70
|
|
|
// = 1*3digit "." 1*3digit "." 1*3digit "." 1*3digit |
|
|
|
|
71
|
|
|
$ip4addr = "(?:(?:[$digit]{1,3})$dot(?:[$digit]{1,3})$dot(?:[$digit]{1,3})$dot(?:[$digit]{1,3}))"; |
72
|
|
|
|
73
|
|
|
// = 1*hexdigit 7( ":" 1*hexdigit ) |
|
|
|
|
74
|
|
|
// =/ "0:0:0:0:0:" ( "0" / "FFFF" ) ":" ip4addr |
75
|
|
|
$ip6addr = "(?:(?:[$hexdigit]+?(?:$colon(?:[$hexdigit]+?)){7})|(?:0:0:0:0:0:(?:0|FFFF)$colon$ip4addr))"; |
76
|
|
|
|
77
|
|
|
// = ip4addr / ip6addr |
78
|
|
|
$hostaddr = "(?:$ip4addr|$ip6addr)"; |
79
|
|
|
|
80
|
|
|
// = hostname / hostaddr |
81
|
|
|
$host = "(?:$hostname|$hostaddr)"; |
82
|
|
|
|
83
|
|
|
// = ( letter / special ) *8( letter / digit / special / "-" ) |
84
|
|
|
// * While the maximum length is limited to nine characters, clients |
85
|
|
|
// * SHOULD accept longer strings as they may become used in future |
86
|
|
|
// * evolutions of the protocol. |
87
|
|
|
// * https://tools.ietf.org/html/rfc2812#section-1.2.1 |
88
|
|
|
$nickname = "(?:[$letter$special][$letter$digit$special$dash]*)"; |
89
|
|
|
|
90
|
|
|
// = 1*( %x01-09 / %x0B-0C / %x0E-1F / %x21-3F / %x41-FF ) ; any octet except NUL, CR, LF, " " and "@" |
91
|
|
|
$user = "(?:[\x01-\x09|\x0B-\x0C|\x0E-\x1F|\x21-\x3F|\x41-\xFF]+)"; |
92
|
|
|
|
93
|
|
|
// = servername / ( nickname [ [ "!" user ] "@" host ] ) |
94
|
|
|
$prefix = "(?:(?P<servername>$servername)" . |
95
|
|
|
"|(?:(?P<nickname>$nickname)(?:$bang(?P<username>$user))?(?:$at(?P<hostname>$host))?))"; |
96
|
|
|
|
97
|
|
|
// = [ ":" prefix SPACE ] command [ params ] crlf |
98
|
|
|
$message = "(?P<prefix>$colon$prefix$space)?(?P<command>$command)(?P<params>$params)?$crlf"; |
99
|
|
|
|
100
|
|
|
// Do the thing |
101
|
|
|
preg_match("/^$message\$/SU", $raw, $matches); |
102
|
|
|
|
103
|
|
|
// Trim whitespace |
104
|
|
|
$matches = array_map('trim', $matches); |
105
|
|
|
|
106
|
|
|
// Return only the named matches we want in the order we want |
107
|
|
|
return [ |
108
|
|
|
'nickname' => $matches['nickname'] ?? '', |
109
|
|
|
'username' => $matches['username'] ?? '', |
110
|
|
|
'hostname' => $matches['hostname'] ?? '', |
111
|
|
|
'serverName' => $matches['serverName'] ?? '', |
112
|
|
|
'command' => $matches['command'] ?? '', |
113
|
|
|
'params' => $matches['params'] ?? '', |
114
|
|
|
]; |
115
|
|
|
} |
116
|
|
|
} |
117
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.