Passed
Pull Request — 3.x (#342)
by Maximilian
02:20
created

EmailLexer::isInvalidChar()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 1
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Egulias\EmailValidator;
4
5
use Doctrine\Common\Lexer\AbstractLexer;
6
7
class EmailLexer extends AbstractLexer
8
{
9
    //ASCII values
10
    public const S_EMPTY            = null;
11
    public const C_NUL              = 0;
12
    public const S_HTAB             = 9;
13
    public const S_LF               = 10;
14
    public const S_CR               = 13;
15
    public const S_SP               = 32;
16
    public const EXCLAMATION        = 33;
17
    public const S_DQUOTE           = 34;
18
    public const NUMBER_SIGN        = 35;
19
    public const DOLLAR             = 36;
20
    public const PERCENTAGE         = 37;
21
    public const AMPERSAND          = 38;
22
    public const S_SQUOTE           = 39;
23
    public const S_OPENPARENTHESIS  = 40;
24
    public const S_CLOSEPARENTHESIS = 41;
25
    public const ASTERISK           = 42;
26
    public const S_PLUS             = 43;
27
    public const S_COMMA            = 44;
28
    public const S_HYPHEN           = 45;
29
    public const S_DOT              = 46;
30
    public const S_SLASH            = 47;
31
    public const S_COLON            = 58;
32
    public const S_SEMICOLON        = 59;
33
    public const S_LOWERTHAN        = 60;
34
    public const S_EQUAL            = 61;
35
    public const S_GREATERTHAN      = 62;
36
    public const QUESTIONMARK       = 63;
37
    public const S_AT               = 64;
38
    public const S_OPENBRACKET      = 91;
39
    public const S_BACKSLASH        = 92;
40
    public const S_CLOSEBRACKET     = 93;
41
    public const CARET              = 94;
42
    public const S_UNDERSCORE       = 95;
43
    public const S_BACKTICK         = 96;
44
    public const S_OPENCURLYBRACES  = 123;
45
    public const S_PIPE             = 124;
46
    public const S_CLOSECURLYBRACES = 125;
47
    public const S_TILDE            = 126;
48
    public const C_DEL              = 127;
49
    public const INVERT_QUESTIONMARK= 168;
50
    public const INVERT_EXCLAMATION = 173;
51
    public const GENERIC            = 300;
52
    public const S_IPV6TAG          = 301;
53
    public const INVALID            = 302;
54
    public const CRLF               = 1310;
55
    public const S_DOUBLECOLON      = 5858;
56
    public const ASCII_INVALID_FROM = 127;
57
    public const ASCII_INVALID_TO   = 199;
58
59
    /**
60
     * US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3)
61
     *
62
     * @var array
63
     */
64
    protected $charValue = [
65
        '{'    => self::S_OPENCURLYBRACES,
66
        '}'    => self::S_CLOSECURLYBRACES,
67
        '('    => self::S_OPENPARENTHESIS,
68
        ')'    => self::S_CLOSEPARENTHESIS,
69
        '<'    => self::S_LOWERTHAN,
70
        '>'    => self::S_GREATERTHAN,
71
        '['    => self::S_OPENBRACKET,
72
        ']'    => self::S_CLOSEBRACKET,
73
        ':'    => self::S_COLON,
74
        ';'    => self::S_SEMICOLON,
75
        '@'    => self::S_AT,
76
        '\\'   => self::S_BACKSLASH,
77
        '/'    => self::S_SLASH,
78
        ','    => self::S_COMMA,
79
        '.'    => self::S_DOT,
80
        "'"    => self::S_SQUOTE,
81
        "`"    => self::S_BACKTICK,
82
        '"'    => self::S_DQUOTE,
83
        '-'    => self::S_HYPHEN,
84
        '::'   => self::S_DOUBLECOLON,
85
        ' '    => self::S_SP,
86
        "\t"   => self::S_HTAB,
87
        "\r"   => self::S_CR,
88
        "\n"   => self::S_LF,
89
        "\r\n" => self::CRLF,
90
        'IPv6' => self::S_IPV6TAG,
91
        ''     => self::S_EMPTY,
92
        '\0'   => self::C_NUL,
93
        '*'    => self::ASTERISK,
94
        '!'    => self::EXCLAMATION,
95
        '&'    => self::AMPERSAND,
96
        '^'    => self::CARET,
97
        '$'    => self::DOLLAR,
98
        '%'    => self::PERCENTAGE,
99
        '~'    => self::S_TILDE,
100
        '|'    => self::S_PIPE,
101
        '_'    => self::S_UNDERSCORE,
102
        '='    => self::S_EQUAL,
103
        '+'    => self::S_PLUS,
104
        '¿'    => self::INVERT_QUESTIONMARK,
105
        '?'    => self::QUESTIONMARK,
106
        '#'    => self::NUMBER_SIGN,
107
        '¡'    => self::INVERT_EXCLAMATION,
108
    ];
109
110
    public const INVALID_CHARS_REGEX = "/[^\p{S}\p{C}\p{Cc}]+/iu";
111
112
    public const VALID_UTF8_REGEX = '/\p{Cc}+/u';
113
114
    public const CATCHABLE_PATTERNS = [
115
        '[a-zA-Z]+[46]?', //ASCII and domain literal
116
        '[^\x00-\x7F]',  //UTF-8
117
        '[0-9]+',
118
        '\r\n',
119
        '::',
120
        '\s+?',
121
        '.',
122
    ];
123
124
    public const NON_CATCHABLE_PATTERNS = [
125
        '[\xA0-\xff]+',
126
    ];
127
128
    public const MODIFIERS = 'iu';
129
130
    /** @var bool */
131
    protected $hasInvalidTokens = false;
132
133
    /**
134
     * @var array
135
     *
136
     * @psalm-suppress UndefinedDocblockClass
137
     * @psalm-var array{value:string, type:null|int, position:int}|array<empty, empty>|\Doctrine\Common\Lexer\Token|null
138
     */
139
    protected $previous = [];
140
141
    /**
142
     * @var array
143
     *
144
     * @psalm-suppress UndefinedDocblockClass
145
     * @psalm-var array{value:string, type:null|int, position:int}|array<empty, empty>|\Doctrine\Common\Lexer\Token|null
146
     */
147
    private static $nullToken;
148
149
    /** @var string */
150
    private $accumulator = '';
151
152
    /** @var bool */
153
    private $hasToRecord = false;
154
155
    /**
156
     * @psalm-suppress InvalidPropertyAssignmentValue
157
     * @psalm-suppress PropertyTypeCoercion
158
     */
159 340
    public function __construct()
160
    {
161 340
        if (null === self::$nullToken) {
0 ignored issues
show
introduced by
The condition null === self::nullToken is always false.
Loading history...
162 1
            self::$nullToken = $this->denormalizeToken([
163 1
                'value' => '',
164 1
                'type' => null,
165 1
                'position' => 0,
166 1
            ]);
167
        }
168
169 340
        $this->previous = $this->token = self::$nullToken;
170 340
        $this->lookahead = null;
171
    }
172
173
    /**
174
     * @psalm-suppress InvalidPropertyAssignmentValue
175
     * @psalm-suppress PropertyTypeCoercion
176
     */
177 308
    public function reset() : void
178
    {
179 308
        $this->hasInvalidTokens = false;
180 308
        parent::reset();
181 308
        $this->previous = $this->token = self::$nullToken;
182
    }
183
184
    /**
185
     * @param int $type
186
     * @throws \UnexpectedValueException
187
     * @return boolean
188
     *
189
     * @psalm-suppress InvalidScalarArgument
190
     */
191 54
    public function find($type) : bool
192
    {
193 54
        $search = clone $this;
194 54
        $search->skipUntil($type);
195
196 54
        if (!$search->lookahead) {
197 6
            throw new \UnexpectedValueException($type . ' not found');
198
        }
199 48
        return true;
200
    }
201
202
    /**
203
     * moveNext
204
     *
205
     * @psalm-suppress InvalidPropertyAssignmentValue
206
     * @psalm-suppress PropertyTypeCoercion
207
     * @return boolean
208
     */
209 297
    public function moveNext() : bool
210
    {
211 297
        if ($this->hasToRecord && $this->previous === self::$nullToken) {
212 179
            $this->accumulator .= $this->getToken()['value'];
213
        }
214
215 297
        $this->previous = $this->token;
216
        
217 297
        if($this->lookahead === null) {
218 297
            $this->lookahead = self::$nullToken;
219
        }
220
221 297
        $hasNext = parent::moveNext();
222
223 297
        if ($this->hasToRecord) {
224 179
            $this->accumulator .= $this->getToken()['value'];
225
        }
226
227 297
        return $hasNext;
228
    }
229
230
    /**
231
     * Retrieve token type. Also processes the token value if necessary.
232
     *
233
     * @param string $value
234
     * @throws \InvalidArgumentException
235
     * @return integer
236
     */
237 300
    protected function getType(&$value)
238
    {
239 300
        $encoded = $value;
240
241 300
        if (mb_detect_encoding($value, 'auto', true) !== 'UTF-8') {
242 237
            $encoded = mb_convert_encoding($value, 'UTF-8', 'Windows-1252');
243
        }
244
245 300
        if ($this->isValid($encoded)) {
0 ignored issues
show
Bug introduced by
It seems like $encoded can also be of type array; however, parameter $value of Egulias\EmailValidator\EmailLexer::isValid() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

245
        if ($this->isValid(/** @scrutinizer ignore-type */ $encoded)) {
Loading history...
246 229
            return $this->charValue[$encoded];
247
        }
248
249 262
        if ($this->isNullType($encoded)) {
0 ignored issues
show
Bug introduced by
It seems like $encoded can also be of type array; however, parameter $value of Egulias\EmailValidator\EmailLexer::isNullType() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

249
        if ($this->isNullType(/** @scrutinizer ignore-type */ $encoded)) {
Loading history...
250 2
            return self::C_NUL;
251
        }
252
253 261
        if ($this->isInvalidChar($encoded)) {
0 ignored issues
show
Bug introduced by
It seems like $encoded can also be of type array; however, parameter $value of Egulias\EmailValidator\EmailLexer::isInvalidChar() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

253
        if ($this->isInvalidChar(/** @scrutinizer ignore-type */ $encoded)) {
Loading history...
254 66
            $this->hasInvalidTokens = true;
255 66
            return self::INVALID;
256
        }
257
258
259 199
        return  self::GENERIC;
260
    }
261
262 300
    protected function isValid(string $value) : bool
263
    {
264 300
        return isset($this->charValue[$value]);
265
    }
266
267 262
    protected function isNullType(string $value) : bool
268
    {
269 262
        return $value === "\0";
270
    }
271
272 261
    protected function isInvalidChar(string $value) : bool
273
    {
274 261
        return !preg_match(self::INVALID_CHARS_REGEX, $value);
275
    }
276
277
    protected function isUTF8Invalid(string $value) : bool
278
    {
279
        return preg_match(self::VALID_UTF8_REGEX, $value) !== false;
280
    }
281
282 191
    public function hasInvalidTokens() : bool
283
    {
284 191
        return $this->hasInvalidTokens;
285
    }
286
287 178
    public function getPrevious() : array
288
    {
289 178
        return $this->normalizeToken($this->previous);
290
    }
291
292 299
    public function getToken() : array
293
    {
294 299
        return $this->normalizeToken($this->token);
295
    }
296
297
    /**
298
     * @psalm-suppress UndefinedDocblockClass
299
     *
300
     * @param array|\ArrayAccess|\Doctrine\Common\Lexer\Token $token
301
     * @return array|\Doctrine\Common\Lexer\Token
302
     */
303 1
    private function denormalizeToken($token)
304
    {
305 1
        if (class_exists('Doctrine\Common\Lexer\Token')) {
306
            if ($token instanceof \Doctrine\Common\Lexer\Token) {
0 ignored issues
show
Bug introduced by
The type Doctrine\Common\Lexer\Token was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
307
                return $token;
308
            }
309
310
            if (is_array($token) || $token instanceof \ArrayAccess) {
0 ignored issues
show
introduced by
$token is always a sub-type of ArrayAccess.
Loading history...
311
                return new \Doctrine\Common\Lexer\Token($token['value'],$token['type'],$token['position']);
312
            }
313
        }
314
315 1
        if (is_array($token)) {
316 1
            return $token;
317
        }
318
319
        throw new \LogicException(sprintf('unsupported type of token "%s"', get_debug_type($token)));
320
    }
321
322
    /**
323
     * @psalm-suppress UndefinedClass
324
     * @psalm-suppress UndefinedDocblockClass
325
     * @psalm-param array{position: int, type: int|null|string, value: int|string}|array{position?: int, type?: int|null, value?: string}|\Doctrine\Common\Lexer\Token|null $token
326
     */
327 299
    private function normalizeToken($token): array
328
    {
329 299
        if (is_array($token)) {
330 299
            return $token;
331
        }
332
333
        if (class_exists('Doctrine\Common\Lexer\Token') && $token instanceof \Doctrine\Common\Lexer\Token) {
334
            return [
335
                'value' => $token->value,
336
                'type' => $token->type,
337
                'position' => $token->position,
338
            ];
339
        }
340
341
        throw new \LogicException(sprintf('unsupported type of token "%s"', get_debug_type($token)));
342
    }
343
344
    /**
345
     * Lexical catchable patterns.
346
     *
347
     * @return string[]
348
     */
349 301
    protected function getCatchablePatterns() : array
350
    {
351 301
        return self::CATCHABLE_PATTERNS;
352
    }
353
354
    /**
355
     * Lexical non-catchable patterns.
356
     *
357
     * @return string[]
358
     */
359 301
    protected function getNonCatchablePatterns() : array
360
    {
361 301
        return self::NON_CATCHABLE_PATTERNS;
362
    }
363
364 301
    protected function getModifiers() : string
365
    {
366 301
        return self::MODIFIERS;
367
    }
368
369 151
    public function getAccumulatedValues() : string
370
    {
371 151
        return $this->accumulator;
372
    }
373
374 188
    public function startRecording() : void
375
    {
376 188
        $this->hasToRecord = true;
377
    }
378
379 148
    public function stopRecording() : void
380
    {
381 148
        $this->hasToRecord = false;
382
    }
383
384 149
    public function clearRecorded() : void
385
    {
386 149
        $this->accumulator = '';
387
    }
388
}
389