Passed
Pull Request — 3.x (#306)
by
unknown
01:58
created

EmailLexer   A

Complexity

Total Complexity 27

Size/Duplication

Total Lines 327
Duplicated Lines 0 %

Test Coverage

Coverage 96.88%

Importance

Changes 0
Metric Value
eloc 158
dl 0
loc 327
ccs 62
cts 64
cp 0.9688
rs 10
c 0
b 0
f 0
wmc 27

18 Methods

Rating   Name   Duplication   Size   Complexity  
A reset() 0 5 1
A getPrevious() 0 3 1
A __construct() 0 4 1
A isInvalidChar() 0 3 1
A getAccumulatedValues() 0 3 1
A moveNext() 0 15 5
A stopRecording() 0 3 1
A isNullType() 0 3 1
A startRecording() 0 3 1
A getModifiers() 0 3 1
A isUTF8Invalid() 0 3 1
A getCatchablePatterns() 0 3 1
A hasInvalidTokens() 0 3 1
A getNonCatchablePatterns() 0 3 1
A getType() 0 23 5
A isValid() 0 3 1
A clearRecorded() 0 3 1
A find() 0 9 2
1
<?php
2
3
namespace Egulias\EmailValidator;
4
5
use Doctrine\Common\Lexer\AbstractLexer;
6
7
class EmailLexer extends AbstractLexer
8
{
9
    //ASCII values
10
    const S_EMPTY            = null;
11
    const C_NUL              = 0;
12
    const S_HTAB             = 9;
13
    const S_LF               = 10;
14
    const S_CR               = 13;
15
    const S_SP               = 32;
16
    const EXCLAMATION        = 33;
17
    const S_DQUOTE           = 34;
18
    const NUMBER_SIGN        = 35;
19
    const DOLLAR             = 36;
20
    const PERCENTAGE         = 37;
21
    const AMPERSAND          = 38;
22
    const S_SQUOTE           = 39;
23
    const S_OPENPARENTHESIS  = 40;
24
    const S_CLOSEPARENTHESIS = 41;
25
    const ASTERISK           = 42;
26
    const S_PLUS             = 43;
27
    const S_COMMA            = 44;
28
    const S_HYPHEN           = 45;
29
    const S_DOT              = 46;
30
    const S_SLASH            = 47;
31
    const S_COLON            = 58;
32
    const S_SEMICOLON        = 59;
33
    const S_LOWERTHAN        = 60;
34
    const S_EQUAL            = 61;
35
    const S_GREATERTHAN      = 62;
36
    const QUESTIONMARK       = 63;
37
    const S_AT               = 64;
38
    const S_OPENBRACKET      = 91;
39
    const S_BACKSLASH        = 92;
40
    const S_CLOSEBRACKET     = 93;
41
    const CARET              = 94;
42
    const S_UNDERSCORE       = 95;
43
    const S_BACKTICK         = 96;
44
    const S_OPENCURLYBRACES  = 123;
45
    const S_PIPE             = 124;
46
    const S_CLOSECURLYBRACES = 125;
47
    const S_TILDE            = 126;
48
    const C_DEL              = 127;
49
    const INVERT_QUESTIONMARK= 168;
50
    const INVERT_EXCLAMATION = 173;
51
    const GENERIC            = 300;
52
    const S_IPV6TAG          = 301;
53
    const INVALID            = 302;
54
    const CRLF               = 1310;
55
    const S_DOUBLECOLON      = 5858;
56
    const ASCII_INVALID_FROM = 127;
57
    const ASCII_INVALID_TO   = 199;
58
59
    /**
60
     * US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3)
61
     *
62
     * @var array
63
     */
64
    protected $charValue = [
65
        '{'    => self::S_OPENCURLYBRACES,
66
        '}'    => self::S_CLOSECURLYBRACES,
67
        '('    => self::S_OPENPARENTHESIS,
68
        ')'    => self::S_CLOSEPARENTHESIS,
69
        '<'    => self::S_LOWERTHAN,
70
        '>'    => self::S_GREATERTHAN,
71
        '['    => self::S_OPENBRACKET,
72
        ']'    => self::S_CLOSEBRACKET,
73
        ':'    => self::S_COLON,
74
        ';'    => self::S_SEMICOLON,
75
        '@'    => self::S_AT,
76
        '\\'   => self::S_BACKSLASH,
77
        '/'    => self::S_SLASH,
78
        ','    => self::S_COMMA,
79
        '.'    => self::S_DOT,
80
        "'"    => self::S_SQUOTE,
81
        "`"    => self::S_BACKTICK,
82
        '"'    => self::S_DQUOTE,
83
        '-'    => self::S_HYPHEN,
84
        '::'   => self::S_DOUBLECOLON,
85
        ' '    => self::S_SP,
86
        "\t"   => self::S_HTAB,
87
        "\r"   => self::S_CR,
88
        "\n"   => self::S_LF,
89
        "\r\n" => self::CRLF,
90
        'IPv6' => self::S_IPV6TAG,
91
        ''     => self::S_EMPTY,
92
        '\0'   => self::C_NUL,
93
        '*'    => self::ASTERISK,
94
        '!'    => self::EXCLAMATION,
95
        '&'    => self::AMPERSAND,
96
        '^'    => self::CARET,
97
        '$'    => self::DOLLAR,
98
        '%'    => self::PERCENTAGE,
99
        '~'    => self::S_TILDE,
100
        '|'    => self::S_PIPE,
101
        '_'    => self::S_UNDERSCORE,
102
        '='    => self::S_EQUAL,
103
        '+'    => self::S_PLUS,
104
        '¿'    => self::INVERT_QUESTIONMARK,
105
        '?'    => self::QUESTIONMARK,
106
        '#'    => self::NUMBER_SIGN,
107
        '¡'    => self::INVERT_EXCLAMATION,
108
    ];
109
110
    const INVALID_CHARS_REGEX = "/[^\p{S}\p{C}\p{Cc}]+/iu";
111
112
    const VALID_UTF8_REGEX = '/\p{Cc}+/u';
113
114
    const CATCHABLE_PATTERNS = [
115
        '[a-zA-Z]+[46]?', //ASCII and domain literal
116
        '[^\x00-\x7F]',  //UTF-8
117
        '[0-9]+',
118
        '\r\n',
119
        '::',
120
        '\s+?',
121
        '.',
122
    ];
123
124
    const NON_CATCHABLE_PATTERNS = [
125
        '[\xA0-\xff]+',
126
    ];
127
128
    const MODIFIERS = 'iu';
129
130
    /** @var bool */
131
    protected $hasInvalidTokens = false;
132
133
    /**
134
     * @var array
135
     *
136
     * @psalm-var array{value:string, type:null|int, position:int}|array<empty, empty>
137
     */
138
    protected $previous = [];
139
140
    /**
141
     * The last matched/seen token.
142
     *
143
     * @var array
144
     *
145
     * @psalm-suppress NonInvariantDocblockPropertyType
146
     * @psalm-var array{value:string, type:null|int, position:int}
147
     * @psalm-suppress NonInvariantDocblockPropertyType
148
     */
149
    public $token;
150
151
    /**
152
     * The next token in the input.
153
     *
154
     * @var array|null
155
     */
156
    public $lookahead;
157
158
    /** @psalm-var array{value:'', type:null, position:0} */
159
    private static $nullToken = [
160
        'value' => '',
161
        'type' => null,
162
        'position' => 0,
163
    ];
164
165
    /** @var string */
166
    private $accumulator = '';
167
168
    /** @var bool */
169
    private $hasToRecord = false;
170
171 339
    public function __construct()
172
    {
173 339
        $this->previous = $this->token = self::$nullToken;
174 339
        $this->lookahead = null;
175 339
    }
176
177 308
    public function reset() : void
178
    {
179 308
        $this->hasInvalidTokens = false;
180 308
        parent::reset();
181 308
        $this->previous = $this->token = self::$nullToken;
182 308
    }
183
184
    /**
185
     * @param int $type
186
     * @throws \UnexpectedValueException
187
     * @return boolean
188
     *
189
     * @psalm-suppress InvalidScalarArgument
190
     */
191 54
    public function find($type) : bool
192
    {
193 54
        $search = clone $this;
194 54
        $search->skipUntil($type);
195
196 54
        if (!$search->lookahead) {
197 6
            throw new \UnexpectedValueException($type . ' not found');
198
        }
199 48
        return true;
200
    }
201
202
    /**
203
     * moveNext
204
     *
205
     * @return boolean
206
     */
207 297
    public function moveNext() : bool
208
    {
209 297
        if ($this->hasToRecord && $this->previous === self::$nullToken) {
210 179
            $this->accumulator .= $this->token['value'];
211
        }
212
213 297
        $this->previous = $this->token;
214 297
        $hasNext = parent::moveNext();
215 297
        $this->token = $this->token ?: self::$nullToken; 
216
217 297
        if ($this->hasToRecord) {
218 179
            $this->accumulator .= $this->token['value'];
219
        }
220
221 297
        return $hasNext;
222
    }
223
224
    /**
225
     * Retrieve token type. Also processes the token value if necessary.
226
     *
227
     * @param string $value
228
     * @throws \InvalidArgumentException
229
     * @return integer
230
     */
231 300
    protected function getType(&$value)
232
    {
233 300
        $encoded = $value;
234
235 300
        if (mb_detect_encoding($value, 'auto', true) !== 'UTF-8') {
236 237
            $encoded = utf8_encode($value);
237
        }
238
239 300
        if ($this->isValid($encoded)) {
240 229
            return $this->charValue[$encoded];
241
        }
242
243 262
        if ($this->isNullType($encoded)) {
244 2
            return self::C_NUL;
245
        }
246
247 261
        if ($this->isInvalidChar($encoded)) {
248 66
            $this->hasInvalidTokens = true;
249 66
            return self::INVALID;
250
        }
251
252
253 199
        return  self::GENERIC;
254
    }
255
256 300
    protected function isValid(string $value) : bool
257
    {
258 300
        return isset($this->charValue[$value]);
259
    }
260
261 262
    protected function isNullType(string $value) : bool
262
    {
263 262
        return $value === "\0";
264
    }
265
266 261
    protected function isInvalidChar(string $value) : bool
267
    {
268 261
        return !preg_match(self::INVALID_CHARS_REGEX, $value);
269
    }
270
271
    protected function isUTF8Invalid(string $value) : bool
272
    {
273
        return preg_match(self::VALID_UTF8_REGEX, $value) !== false;
274
    }
275
276 191
    public function hasInvalidTokens() : bool
277
    {
278 191
        return $this->hasInvalidTokens;
279
    }
280
281
    /**
282
     * getPrevious
283
     *
284
     * @return array
285
     */
286 178
    public function getPrevious() : array
287
    {
288 178
        return $this->previous;
289
    }
290
291
    /**
292
     * Lexical catchable patterns.
293
     *
294
     * @return string[]
295
     */
296 301
    protected function getCatchablePatterns() : array
297
    {
298 301
        return self::CATCHABLE_PATTERNS;
299
    }
300
301
    /**
302
     * Lexical non-catchable patterns.
303
     *
304
     * @return string[]
305
     */
306 301
    protected function getNonCatchablePatterns() : array
307
    {
308 301
        return self::NON_CATCHABLE_PATTERNS;
309
    }
310
311 301
    protected function getModifiers() : string
312
    {
313 301
        return self::MODIFIERS;
314
    }
315
316 151
    public function getAccumulatedValues() : string
317
    {
318 151
        return $this->accumulator;
319
    }
320
321 188
    public function startRecording() : void
322
    {
323 188
        $this->hasToRecord = true;
324 188
    }
325
326 148
    public function stopRecording() : void
327
    {
328 148
        $this->hasToRecord = false;
329 148
    }
330
331 149
    public function clearRecorded() : void
332
    {
333 149
        $this->accumulator = '';
334 149
    }
335
}
336