1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Egulias\EmailValidator; |
4
|
|
|
|
5
|
|
|
use Doctrine\Common\Lexer\AbstractLexer; |
6
|
|
|
|
7
|
|
|
class EmailLexer extends AbstractLexer |
8
|
|
|
{ |
9
|
|
|
//ASCII values |
10
|
|
|
const C_DEL = 127; |
11
|
|
|
const C_NUL = 0; |
12
|
|
|
const S_AT = 64; |
13
|
|
|
const S_BACKSLASH = 92; |
14
|
|
|
const S_DOT = 46; |
15
|
|
|
const S_DQUOTE = 34; |
16
|
|
|
const S_OPENPARENTHESIS = 49; |
17
|
|
|
const S_CLOSEPARENTHESIS = 261; |
18
|
|
|
const S_OPENBRACKET = 262; |
19
|
|
|
const S_CLOSEBRACKET = 263; |
20
|
|
|
const S_HYPHEN = 264; |
21
|
|
|
const S_COLON = 265; |
22
|
|
|
const S_DOUBLECOLON = 266; |
23
|
|
|
const S_SP = 267; |
24
|
|
|
const S_HTAB = 268; |
25
|
|
|
const S_CR = 269; |
26
|
|
|
const S_LF = 270; |
27
|
|
|
const S_IPV6TAG = 271; |
28
|
|
|
const S_LOWERTHAN = 272; |
29
|
|
|
const S_GREATERTHAN = 273; |
30
|
|
|
const S_COMMA = 274; |
31
|
|
|
const S_SEMICOLON = 275; |
32
|
|
|
const S_OPENQBRACKET = 276; |
33
|
|
|
const S_CLOSEQBRACKET = 277; |
34
|
|
|
const S_SLASH = 278; |
35
|
|
|
const S_EMPTY = null; |
36
|
|
|
const GENERIC = 300; |
37
|
|
|
const CRLF = 301; |
38
|
|
|
const INVALID = 302; |
39
|
|
|
const ASCII_INVALID_FROM = 127; |
40
|
|
|
const ASCII_INVALID_TO = 199; |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3) |
44
|
|
|
* |
45
|
|
|
* @var array |
46
|
|
|
*/ |
47
|
|
|
protected $charValue = array( |
48
|
|
|
'(' => self::S_OPENPARENTHESIS, |
49
|
|
|
')' => self::S_CLOSEPARENTHESIS, |
50
|
|
|
'<' => self::S_LOWERTHAN, |
51
|
|
|
'>' => self::S_GREATERTHAN, |
52
|
|
|
'[' => self::S_OPENBRACKET, |
53
|
|
|
']' => self::S_CLOSEBRACKET, |
54
|
|
|
':' => self::S_COLON, |
55
|
|
|
';' => self::S_SEMICOLON, |
56
|
|
|
'@' => self::S_AT, |
57
|
|
|
'\\' => self::S_BACKSLASH, |
58
|
|
|
'/' => self::S_SLASH, |
59
|
|
|
',' => self::S_COMMA, |
60
|
|
|
'.' => self::S_DOT, |
61
|
|
|
'"' => self::S_DQUOTE, |
62
|
|
|
'-' => self::S_HYPHEN, |
63
|
|
|
'::' => self::S_DOUBLECOLON, |
64
|
|
|
' ' => self::S_SP, |
65
|
|
|
"\t" => self::S_HTAB, |
66
|
|
|
"\r" => self::S_CR, |
67
|
|
|
"\n" => self::S_LF, |
68
|
|
|
"\r\n" => self::CRLF, |
69
|
|
|
'IPv6' => self::S_IPV6TAG, |
70
|
|
|
'{' => self::S_OPENQBRACKET, |
71
|
|
|
'}' => self::S_CLOSEQBRACKET, |
72
|
|
|
'' => self::S_EMPTY, |
73
|
|
|
'\0' => self::C_NUL, |
74
|
|
|
); |
75
|
|
|
|
76
|
|
|
protected $hasInvalidTokens = false; |
77
|
|
|
|
78
|
|
|
protected $previous; |
79
|
|
|
|
80
|
|
|
private static $nullToken = [ |
81
|
|
|
'value' => '', |
82
|
|
|
'type' => null, |
83
|
|
|
'position' => 0, |
84
|
|
|
]; |
85
|
|
|
|
86
|
248 |
|
public function __construct() |
87
|
|
|
{ |
88
|
248 |
|
$this->previous = $this->token = self::$nullToken; |
89
|
248 |
|
} |
90
|
|
|
|
91
|
247 |
|
public function reset() |
92
|
|
|
{ |
93
|
247 |
|
$this->hasInvalidTokens = false; |
94
|
247 |
|
parent::reset(); |
95
|
247 |
|
$this->previous = $this->token = self::$nullToken; |
96
|
247 |
|
} |
97
|
|
|
|
98
|
75 |
|
public function hasInvalidTokens() |
99
|
|
|
{ |
100
|
75 |
|
return $this->hasInvalidTokens; |
101
|
|
|
} |
102
|
|
|
|
103
|
|
|
/** |
104
|
|
|
* @param $type |
105
|
|
|
* @throws \UnexpectedValueException |
106
|
|
|
* @return boolean |
107
|
|
|
*/ |
108
|
64 |
|
public function find($type) |
109
|
|
|
{ |
110
|
64 |
|
$search = clone $this; |
111
|
64 |
|
$search->skipUntil($type); |
112
|
|
|
|
113
|
64 |
|
if (!$search->lookahead) { |
|
|
|
|
114
|
6 |
|
throw new \UnexpectedValueException($type . ' not found'); |
115
|
|
|
} |
116
|
60 |
|
return true; |
117
|
|
|
} |
118
|
|
|
|
119
|
|
|
/** |
120
|
|
|
* getPrevious |
121
|
|
|
* |
122
|
|
|
* @return array token |
123
|
|
|
*/ |
124
|
143 |
|
public function getPrevious() |
125
|
|
|
{ |
126
|
143 |
|
return $this->previous; |
127
|
|
|
} |
128
|
|
|
|
129
|
|
|
/** |
130
|
|
|
* moveNext |
131
|
|
|
* |
132
|
|
|
* @return boolean |
133
|
|
|
*/ |
134
|
247 |
|
public function moveNext() |
135
|
|
|
{ |
136
|
247 |
|
$this->previous = $this->token; |
137
|
247 |
|
$hasNext = parent::moveNext(); |
138
|
247 |
|
$this->token = $this->token ?: self::$nullToken; |
139
|
|
|
|
140
|
247 |
|
return $hasNext; |
141
|
|
|
} |
142
|
|
|
|
143
|
|
|
/** |
144
|
|
|
* Lexical catchable patterns. |
145
|
|
|
* |
146
|
|
|
* @return string[] |
147
|
|
|
*/ |
148
|
1 |
|
protected function getCatchablePatterns() |
149
|
|
|
{ |
150
|
|
|
return array( |
151
|
1 |
|
'[a-zA-Z_]+[46]?', //ASCII and domain literal |
152
|
1 |
|
'[^\x00-\x7F]', //UTF-8 |
153
|
1 |
|
'[0-9]+', |
154
|
1 |
|
'\r\n', |
155
|
1 |
|
'::', |
156
|
1 |
|
'\s+?', |
157
|
1 |
|
'.', |
158
|
1 |
|
); |
159
|
|
|
} |
160
|
|
|
|
161
|
|
|
/** |
162
|
|
|
* Lexical non-catchable patterns. |
163
|
|
|
* |
164
|
|
|
* @return string[] |
165
|
|
|
*/ |
166
|
1 |
|
protected function getNonCatchablePatterns() |
167
|
|
|
{ |
168
|
1 |
|
return array('[\xA0-\xff]+'); |
169
|
|
|
} |
170
|
|
|
|
171
|
|
|
/** |
172
|
|
|
* Retrieve token type. Also processes the token value if necessary. |
173
|
|
|
* |
174
|
|
|
* @param string $value |
175
|
|
|
* @throws \InvalidArgumentException |
176
|
|
|
* @return integer |
177
|
|
|
*/ |
178
|
246 |
|
protected function getType(&$value) |
179
|
|
|
{ |
180
|
246 |
|
if ($this->isNullType($value)) { |
181
|
2 |
|
return self::C_NUL; |
182
|
|
|
} |
183
|
|
|
|
184
|
245 |
|
if ($this->isValid($value)) { |
185
|
178 |
|
return $this->charValue[$value]; |
186
|
|
|
} |
187
|
|
|
|
188
|
218 |
|
if ($this->isUTF8Invalid($value)) { |
189
|
62 |
|
$this->hasInvalidTokens = true; |
190
|
62 |
|
return self::INVALID; |
191
|
|
|
} |
192
|
|
|
|
193
|
156 |
|
return self::GENERIC; |
194
|
|
|
} |
195
|
|
|
|
196
|
245 |
|
protected function isValid($value) |
197
|
|
|
{ |
198
|
245 |
|
if (isset($this->charValue[$value])) { |
199
|
178 |
|
return true; |
200
|
|
|
} |
201
|
|
|
|
202
|
218 |
|
return false; |
203
|
|
|
} |
204
|
|
|
|
205
|
|
|
/** |
206
|
|
|
* @param $value |
207
|
|
|
* @return bool |
208
|
|
|
*/ |
209
|
246 |
|
protected function isNullType($value) |
210
|
|
|
{ |
211
|
246 |
|
if ($value === "\0") { |
212
|
2 |
|
return true; |
213
|
|
|
} |
214
|
|
|
|
215
|
245 |
|
return false; |
216
|
|
|
} |
217
|
|
|
|
218
|
|
|
/** |
219
|
|
|
* @param $value |
220
|
|
|
* @return bool |
221
|
|
|
*/ |
222
|
218 |
|
protected function isUTF8Invalid($value) |
223
|
|
|
{ |
224
|
218 |
|
if (preg_match('/\p{Cc}+/u', $value)) { |
225
|
62 |
|
return true; |
226
|
|
|
} |
227
|
|
|
|
228
|
156 |
|
return false; |
229
|
|
|
} |
230
|
|
|
|
231
|
1 |
|
protected function getModifiers() |
232
|
|
|
{ |
233
|
1 |
|
return 'iu'; |
234
|
|
|
} |
235
|
|
|
} |
236
|
|
|
|
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)
or! empty(...)
instead.