Passed
Pull Request — 3.x (#342)
by Maximilian
02:16
created

EmailLexer::getToken()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Egulias\EmailValidator;
4
5
use Doctrine\Common\Lexer\AbstractLexer;
6
7
class EmailLexer extends AbstractLexer
8
{
9
    //ASCII values
10
    public const S_EMPTY            = null;
11
    public const C_NUL              = 0;
12
    public const S_HTAB             = 9;
13
    public const S_LF               = 10;
14
    public const S_CR               = 13;
15
    public const S_SP               = 32;
16
    public const EXCLAMATION        = 33;
17
    public const S_DQUOTE           = 34;
18
    public const NUMBER_SIGN        = 35;
19
    public const DOLLAR             = 36;
20
    public const PERCENTAGE         = 37;
21
    public const AMPERSAND          = 38;
22
    public const S_SQUOTE           = 39;
23
    public const S_OPENPARENTHESIS  = 40;
24
    public const S_CLOSEPARENTHESIS = 41;
25
    public const ASTERISK           = 42;
26
    public const S_PLUS             = 43;
27
    public const S_COMMA            = 44;
28
    public const S_HYPHEN           = 45;
29
    public const S_DOT              = 46;
30
    public const S_SLASH            = 47;
31
    public const S_COLON            = 58;
32
    public const S_SEMICOLON        = 59;
33
    public const S_LOWERTHAN        = 60;
34
    public const S_EQUAL            = 61;
35
    public const S_GREATERTHAN      = 62;
36
    public const QUESTIONMARK       = 63;
37
    public const S_AT               = 64;
38
    public const S_OPENBRACKET      = 91;
39
    public const S_BACKSLASH        = 92;
40
    public const S_CLOSEBRACKET     = 93;
41
    public const CARET              = 94;
42
    public const S_UNDERSCORE       = 95;
43
    public const S_BACKTICK         = 96;
44
    public const S_OPENCURLYBRACES  = 123;
45
    public const S_PIPE             = 124;
46
    public const S_CLOSECURLYBRACES = 125;
47
    public const S_TILDE            = 126;
48
    public const C_DEL              = 127;
49
    public const INVERT_QUESTIONMARK= 168;
50
    public const INVERT_EXCLAMATION = 173;
51
    public const GENERIC            = 300;
52
    public const S_IPV6TAG          = 301;
53
    public const INVALID            = 302;
54
    public const CRLF               = 1310;
55
    public const S_DOUBLECOLON      = 5858;
56
    public const ASCII_INVALID_FROM = 127;
57
    public const ASCII_INVALID_TO   = 199;
58
59
    /**
60
     * US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3)
61
     *
62
     * @var array
63
     */
64
    protected $charValue = [
65
        '{'    => self::S_OPENCURLYBRACES,
66
        '}'    => self::S_CLOSECURLYBRACES,
67
        '('    => self::S_OPENPARENTHESIS,
68
        ')'    => self::S_CLOSEPARENTHESIS,
69
        '<'    => self::S_LOWERTHAN,
70
        '>'    => self::S_GREATERTHAN,
71
        '['    => self::S_OPENBRACKET,
72
        ']'    => self::S_CLOSEBRACKET,
73
        ':'    => self::S_COLON,
74
        ';'    => self::S_SEMICOLON,
75
        '@'    => self::S_AT,
76
        '\\'   => self::S_BACKSLASH,
77
        '/'    => self::S_SLASH,
78
        ','    => self::S_COMMA,
79
        '.'    => self::S_DOT,
80
        "'"    => self::S_SQUOTE,
81
        "`"    => self::S_BACKTICK,
82
        '"'    => self::S_DQUOTE,
83
        '-'    => self::S_HYPHEN,
84
        '::'   => self::S_DOUBLECOLON,
85
        ' '    => self::S_SP,
86
        "\t"   => self::S_HTAB,
87
        "\r"   => self::S_CR,
88
        "\n"   => self::S_LF,
89
        "\r\n" => self::CRLF,
90
        'IPv6' => self::S_IPV6TAG,
91
        ''     => self::S_EMPTY,
92
        '\0'   => self::C_NUL,
93
        '*'    => self::ASTERISK,
94
        '!'    => self::EXCLAMATION,
95
        '&'    => self::AMPERSAND,
96
        '^'    => self::CARET,
97
        '$'    => self::DOLLAR,
98
        '%'    => self::PERCENTAGE,
99
        '~'    => self::S_TILDE,
100
        '|'    => self::S_PIPE,
101
        '_'    => self::S_UNDERSCORE,
102
        '='    => self::S_EQUAL,
103
        '+'    => self::S_PLUS,
104
        '¿'    => self::INVERT_QUESTIONMARK,
105
        '?'    => self::QUESTIONMARK,
106
        '#'    => self::NUMBER_SIGN,
107
        '¡'    => self::INVERT_EXCLAMATION,
108
    ];
109
110
    public const INVALID_CHARS_REGEX = "/[^\p{S}\p{C}\p{Cc}]+/iu";
111
112
    public const VALID_UTF8_REGEX = '/\p{Cc}+/u';
113
114
    public const CATCHABLE_PATTERNS = [
115
        '[a-zA-Z]+[46]?', //ASCII and domain literal
116
        '[^\x00-\x7F]',  //UTF-8
117
        '[0-9]+',
118
        '\r\n',
119
        '::',
120
        '\s+?',
121
        '.',
122
    ];
123
124
    public const NON_CATCHABLE_PATTERNS = [
125
        '[\xA0-\xff]+',
126
    ];
127
128
    public const MODIFIERS = 'iu';
129
130
    /** @var bool */
131
    protected $hasInvalidTokens = false;
132
133
    /**
134
     * @var array
135
     *
136
     * @psalm-suppress UndefinedDocblockClass
137
     * @psalm-var array{value:string, type:null|int, position:int}|array<empty, empty>|\Doctrine\Common\Lexer\Token|null
138
     */
139
    protected $previous = [];
140
141
    /**
142
     * @var array
143
     *
144
     * @psalm-suppress UndefinedDocblockClass
145
     * @psalm-var array{value:string, type:null|int, position:int}|array<empty, empty>|\Doctrine\Common\Lexer\Token|null
146
     */
147
    private static $nullToken = null;
148
149
    /** @var string */
150
    private $accumulator = '';
151
152
    /** @var bool */
153
    private $hasToRecord = false;
154
155
    /**
156
     * @psalm-suppress InvalidPropertyAssignmentValue
157
     * @psalm-suppress PropertyTypeCoercion
158
     */
159 340
    public function __construct()
160
    {
161 340
        if (null === self::$nullToken) {
0 ignored issues
show
introduced by
The condition null === self::nullToken is always false.
Loading history...
162 1
            self::$nullToken = $this->denormalizeToken([
163
                'value' => '',
164
                'type' => null,
165
                'position' => 0,
166
            ]);
167
        }
168
169 340
        $this->previous = $this->token = self::$nullToken;
170 340
        $this->lookahead = null;
171
    }
172
173
    /**
174
     * @psalm-suppress InvalidPropertyAssignmentValue
175
     * @psalm-suppress PropertyTypeCoercion
176
     */
177 308
    public function reset() : void
178
    {
179 308
        $this->hasInvalidTokens = false;
180 308
        parent::reset();
181 308
        $this->previous = $this->token = self::$nullToken;
182
    }
183
184
    /**
185
     * @param int $type
186
     * @throws \UnexpectedValueException
187
     * @return boolean
188
     *
189
     * @psalm-suppress InvalidScalarArgument
190
     * @psalm-suppress InvalidArgument
191
     */
192 54
    public function find($type) : bool
193
    {
194 54
        $search = clone $this;
195 54
        $search->skipUntil($type);
196
197 54
        if (!$search->lookahead) {
198 6
            throw new \UnexpectedValueException($type . ' not found');
199
        }
200 48
        return true;
201
    }
202
203
    /**
204
     * moveNext
205
     *
206
     * @psalm-suppress InvalidPropertyAssignmentValue
207
     * @psalm-suppress PropertyTypeCoercion
208
     * @return boolean
209
     */
210 297
    public function moveNext() : bool
211
    {
212 297
        if ($this->hasToRecord && $this->previous === self::$nullToken) {
213 179
            $this->accumulator .= $this->getToken()['value'];
214
        }
215
216 297
        $this->previous = $this->token;
217
        
218 297
        if($this->lookahead === null) {
219 297
            $this->lookahead = self::$nullToken;
220
        }
221
222 297
        $hasNext = parent::moveNext();
223
224 297
        if ($this->hasToRecord) {
225 179
            $this->accumulator .= $this->getToken()['value'];
226
        }
227
228 297
        return $hasNext;
229
    }
230
231
    /**
232
     * Retrieve token type. Also processes the token value if necessary.
233
     *
234
     * @param string $value
235
     * @throws \InvalidArgumentException
236
     * @return integer
237
     */
238 300
    protected function getType(&$value)
239
    {
240 300
        $encoded = $value;
241
242 300
        if (mb_detect_encoding($value, 'auto', true) !== 'UTF-8') {
243 237
            $encoded = mb_convert_encoding($value, 'UTF-8', 'Windows-1252');
244
        }
245
246 300
        if ($this->isValid($encoded)) {
0 ignored issues
show
Bug introduced by
It seems like $encoded can also be of type array; however, parameter $value of Egulias\EmailValidator\EmailLexer::isValid() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

246
        if ($this->isValid(/** @scrutinizer ignore-type */ $encoded)) {
Loading history...
247 229
            return $this->charValue[$encoded];
248
        }
249
250 262
        if ($this->isNullType($encoded)) {
0 ignored issues
show
Bug introduced by
It seems like $encoded can also be of type array; however, parameter $value of Egulias\EmailValidator\EmailLexer::isNullType() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

250
        if ($this->isNullType(/** @scrutinizer ignore-type */ $encoded)) {
Loading history...
251 2
            return self::C_NUL;
252
        }
253
254 261
        if ($this->isInvalidChar($encoded)) {
0 ignored issues
show
Bug introduced by
It seems like $encoded can also be of type array; however, parameter $value of Egulias\EmailValidator\EmailLexer::isInvalidChar() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

254
        if ($this->isInvalidChar(/** @scrutinizer ignore-type */ $encoded)) {
Loading history...
255 66
            $this->hasInvalidTokens = true;
256 66
            return self::INVALID;
257
        }
258
259
260 199
        return  self::GENERIC;
261
    }
262
263 300
    protected function isValid(string $value) : bool
264
    {
265 300
        return isset($this->charValue[$value]);
266
    }
267
268 262
    protected function isNullType(string $value) : bool
269
    {
270 262
        return $value === "\0";
271
    }
272
273 261
    protected function isInvalidChar(string $value) : bool
274
    {
275 261
        return !preg_match(self::INVALID_CHARS_REGEX, $value);
276
    }
277
278
    protected function isUTF8Invalid(string $value) : bool
279
    {
280
        return preg_match(self::VALID_UTF8_REGEX, $value) !== false;
281
    }
282
283 191
    public function hasInvalidTokens() : bool
284
    {
285 191
        return $this->hasInvalidTokens;
286
    }
287
288 178
    public function getPrevious() : array
289
    {
290 178
        return $this->normalizeToken($this->previous);
291
    }
292
293 299
    public function getToken() : array
294
    {
295 299
        return $this->normalizeToken($this->token);
296
    }
297
298
    /**
299
     * @psalm-suppress UndefinedDocblockClass
300
     *
301
     * @param array|\ArrayAccess|\Doctrine\Common\Lexer\Token $token
302
     * @return array|\Doctrine\Common\Lexer\Token
303
     */
304 1
    private function denormalizeToken($token)
305
    {
306 1
        if (class_exists('Doctrine\Common\Lexer\Token')) {
307
            if ($token instanceof \Doctrine\Common\Lexer\Token) {
0 ignored issues
show
Bug introduced by
The type Doctrine\Common\Lexer\Token was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
308
                return $token;
309
            }
310
311
            if (is_array($token) || $token instanceof \ArrayAccess) {
0 ignored issues
show
introduced by
$token is always a sub-type of ArrayAccess.
Loading history...
312
                return new \Doctrine\Common\Lexer\Token($token['value'],$token['type'],$token['position']);
313
            }
314
        }
315
316 1
        if (is_array($token)) {
317 1
            return $token;
318
        }
319
320
        throw new \LogicException(sprintf('unsupported type of token "%s"', get_debug_type($token)));
321
    }
322
323
    /**
324
     * @psalm-suppress UndefinedClass
325
     * @psalm-suppress UndefinedDocblockClass
326
     * @psalm-param array{position: int, type: int|null|string, value: int|string}|array{position?: int, type?: int|null, value?: string}|\Doctrine\Common\Lexer\Token|null $token
327
     */
328 299
    private function normalizeToken($token): array
329
    {
330 299
        if (is_array($token)) {
331 299
            return $token;
332
        }
333
334
        if (class_exists('Doctrine\Common\Lexer\Token') && $token instanceof \Doctrine\Common\Lexer\Token) {
335
            return [
336
                'value' => $token->value,
337
                'type' => $token->type,
338
                'position' => $token->position,
339
            ];
340
        }
341
342
        throw new \LogicException(sprintf('unsupported type of token "%s"', get_debug_type($token)));
343
    }
344
345
    /**
346
     * Lexical catchable patterns.
347
     *
348
     * @return string[]
349
     */
350 301
    protected function getCatchablePatterns() : array
351
    {
352 301
        return self::CATCHABLE_PATTERNS;
353
    }
354
355
    /**
356
     * Lexical non-catchable patterns.
357
     *
358
     * @return string[]
359
     */
360 301
    protected function getNonCatchablePatterns() : array
361
    {
362 301
        return self::NON_CATCHABLE_PATTERNS;
363
    }
364
365 301
    protected function getModifiers() : string
366
    {
367 301
        return self::MODIFIERS;
368
    }
369
370 151
    public function getAccumulatedValues() : string
371
    {
372 151
        return $this->accumulator;
373
    }
374
375 188
    public function startRecording() : void
376
    {
377 188
        $this->hasToRecord = true;
378
    }
379
380 148
    public function stopRecording() : void
381
    {
382 148
        $this->hasToRecord = false;
383
    }
384
385 149
    public function clearRecorded() : void
386
    {
387 149
        $this->accumulator = '';
388
    }
389
}
390