Passed
Push — 3.x ( 017096...5f35e4 )
by Eduardo Gulias
02:27
created

EmailLexer::isUTF8Invalid()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 1
dl 0
loc 3
ccs 0
cts 2
cp 0
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Egulias\EmailValidator;
4
5
use Doctrine\Common\Lexer\AbstractLexer;
6
use Doctrine\Common\Lexer\Token;
7
8
/**
9
 * @extends AbstractLexer<int, string>
10
 */
11
class EmailLexer extends AbstractLexer
12
{
13
    //ASCII values
14
    public const S_EMPTY            = null;
15
    public const C_NUL              = 0;
16
    public const S_HTAB             = 9;
17
    public const S_LF               = 10;
18
    public const S_CR               = 13;
19
    public const S_SP               = 32;
20
    public const EXCLAMATION        = 33;
21
    public const S_DQUOTE           = 34;
22
    public const NUMBER_SIGN        = 35;
23
    public const DOLLAR             = 36;
24
    public const PERCENTAGE         = 37;
25
    public const AMPERSAND          = 38;
26
    public const S_SQUOTE           = 39;
27
    public const S_OPENPARENTHESIS  = 40;
28
    public const S_CLOSEPARENTHESIS = 41;
29
    public const ASTERISK           = 42;
30
    public const S_PLUS             = 43;
31
    public const S_COMMA            = 44;
32
    public const S_HYPHEN           = 45;
33
    public const S_DOT              = 46;
34
    public const S_SLASH            = 47;
35
    public const S_COLON            = 58;
36
    public const S_SEMICOLON        = 59;
37
    public const S_LOWERTHAN        = 60;
38
    public const S_EQUAL            = 61;
39
    public const S_GREATERTHAN      = 62;
40
    public const QUESTIONMARK       = 63;
41
    public const S_AT               = 64;
42
    public const S_OPENBRACKET      = 91;
43
    public const S_BACKSLASH        = 92;
44
    public const S_CLOSEBRACKET     = 93;
45
    public const CARET              = 94;
46
    public const S_UNDERSCORE       = 95;
47
    public const S_BACKTICK         = 96;
48
    public const S_OPENCURLYBRACES  = 123;
49
    public const S_PIPE             = 124;
50
    public const S_CLOSECURLYBRACES = 125;
51
    public const S_TILDE            = 126;
52
    public const C_DEL              = 127;
53
    public const INVERT_QUESTIONMARK= 168;
54
    public const INVERT_EXCLAMATION = 173;
55
    public const GENERIC            = 300;
56
    public const S_IPV6TAG          = 301;
57
    public const INVALID            = 302;
58
    public const CRLF               = 1310;
59
    public const S_DOUBLECOLON      = 5858;
60
    public const ASCII_INVALID_FROM = 127;
61
    public const ASCII_INVALID_TO   = 199;
62
63
    /**
64
     * US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3)
65
     *
66
     * @var array
67
     */
68
    protected $charValue = [
69
        '{'    => self::S_OPENCURLYBRACES,
70
        '}'    => self::S_CLOSECURLYBRACES,
71
        '('    => self::S_OPENPARENTHESIS,
72
        ')'    => self::S_CLOSEPARENTHESIS,
73
        '<'    => self::S_LOWERTHAN,
74
        '>'    => self::S_GREATERTHAN,
75
        '['    => self::S_OPENBRACKET,
76
        ']'    => self::S_CLOSEBRACKET,
77
        ':'    => self::S_COLON,
78
        ';'    => self::S_SEMICOLON,
79
        '@'    => self::S_AT,
80
        '\\'   => self::S_BACKSLASH,
81
        '/'    => self::S_SLASH,
82
        ','    => self::S_COMMA,
83
        '.'    => self::S_DOT,
84
        "'"    => self::S_SQUOTE,
85
        "`"    => self::S_BACKTICK,
86
        '"'    => self::S_DQUOTE,
87
        '-'    => self::S_HYPHEN,
88
        '::'   => self::S_DOUBLECOLON,
89
        ' '    => self::S_SP,
90
        "\t"   => self::S_HTAB,
91
        "\r"   => self::S_CR,
92
        "\n"   => self::S_LF,
93
        "\r\n" => self::CRLF,
94
        'IPv6' => self::S_IPV6TAG,
95
        ''     => self::S_EMPTY,
96
        '\0'   => self::C_NUL,
97
        '*'    => self::ASTERISK,
98
        '!'    => self::EXCLAMATION,
99
        '&'    => self::AMPERSAND,
100
        '^'    => self::CARET,
101
        '$'    => self::DOLLAR,
102
        '%'    => self::PERCENTAGE,
103
        '~'    => self::S_TILDE,
104
        '|'    => self::S_PIPE,
105
        '_'    => self::S_UNDERSCORE,
106
        '='    => self::S_EQUAL,
107
        '+'    => self::S_PLUS,
108
        '¿'    => self::INVERT_QUESTIONMARK,
109
        '?'    => self::QUESTIONMARK,
110
        '#'    => self::NUMBER_SIGN,
111
        '¡'    => self::INVERT_EXCLAMATION,
112
    ];
113
114
    public const INVALID_CHARS_REGEX = "/[^\p{S}\p{C}\p{Cc}]+/iu";
115
116
    public const VALID_UTF8_REGEX = '/\p{Cc}+/u';
117
118
    public const CATCHABLE_PATTERNS = [
119
        '[a-zA-Z]+[46]?', //ASCII and domain literal
120
        '[^\x00-\x7F]',  //UTF-8
121
        '[0-9]+',
122
        '\r\n',
123
        '::',
124
        '\s+?',
125
        '.',
126
    ];
127
128
    public const NON_CATCHABLE_PATTERNS = [
129
        '[\xA0-\xff]+',
130
    ];
131
132
    public const MODIFIERS = 'iu';
133
134
    /** @var bool */
135
    protected $hasInvalidTokens = false;
136
137
    /**
138
     * @var array
139
     *
140
     * @psalm-var array{value:string, type:null|int, position:int}|array<empty, empty>
141
     */
142
    protected $previous = [];
143
144
    /**
145
     * The last matched/seen token.
146
     *
147
     * @var array|Token
148
     *
149
     * @psalm-suppress NonInvariantDocblockPropertyType
150
     * @psalm-var array{value:string, type:null|int, position:int}|Token<int, string>
151
     */
152
    public $token;
153
154
    /**
155
     * The next token in the input.
156
     *
157
     * @var array|Token|null
158
     *
159
     * @psalm-suppress NonInvariantDocblockPropertyType
160
     * @psalm-var array{position: int, type: int|null|string, value: int|string}|Token<int, string>|null
161
     */
162
    public $lookahead;
163
164
    /** @psalm-var array{value:'', type:null, position:0} */
165
    private static $nullToken = [
166
        'value' => '',
167
        'type' => null,
168
        'position' => 0,
169
    ];
170
171
    /** @var string */
172
    private $accumulator = '';
173
174
    /** @var bool */
175
    private $hasToRecord = false;
176
177 340
    public function __construct()
178
    {
179 340
        $this->previous = $this->token = self::$nullToken;
180 340
        $this->lookahead = null;
181
    }
182
183 308
    public function reset() : void
184
    {
185 308
        $this->hasInvalidTokens = false;
186 308
        parent::reset();
187 308
        $this->previous = $this->token = self::$nullToken;
188
    }
189
190
    /**
191
     * @param int $type
192
     * @throws \UnexpectedValueException
193
     * @return boolean
194
     *
195
     * @psalm-suppress InvalidScalarArgument
196
     */
197 54
    public function find($type) : bool
198
    {
199 54
        $search = clone $this;
200 54
        $search->skipUntil($type);
0 ignored issues
show
Bug introduced by
$type of type integer is incompatible with the type Doctrine\Common\Lexer\T expected by parameter $type of Doctrine\Common\Lexer\AbstractLexer::skipUntil(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

200
        $search->skipUntil(/** @scrutinizer ignore-type */ $type);
Loading history...
201
202 54
        if (!$search->lookahead) {
203 6
            throw new \UnexpectedValueException($type . ' not found');
204
        }
205 48
        return true;
206
    }
207
208
    /**
209
     * moveNext
210
     *
211
     * @return boolean
212
     */
213 297
    public function moveNext() : bool
214
    {
215 297
        if ($this->hasToRecord && $this->previous === self::$nullToken) {
216 179
            $this->accumulator .= $this->token['value'];
217
        }
218
219 297
        $this->previous = $this->token instanceof Token
220 180
            ? ['value' => $this->token->value, 'type' => $this->token->type, 'position' => $this->token->position]
221 297
            : $this->token;
222
        
223 297
        if($this->lookahead === null) {
224 297
            $this->lookahead = self::$nullToken;
225
        }
226
227 297
        $hasNext = parent::moveNext();
228
229 297
        if ($this->hasToRecord) {
230 179
            $this->accumulator .= $this->token['value'];
231
        }
232
233 297
        return $hasNext;
234
    }
235
236
    /**
237
     * Retrieve token type. Also processes the token value if necessary.
238
     *
239
     * @param string $value
240
     * @throws \InvalidArgumentException
241
     * @return integer
242
     */
243 300
    protected function getType(&$value)
244
    {
245 300
        $encoded = $value;
246
247 300
        if (mb_detect_encoding($value, 'auto', true) !== 'UTF-8') {
248 237
            $encoded = mb_convert_encoding($value, 'UTF-8', 'Windows-1252');
249
        }
250
251 300
        if ($this->isValid($encoded)) {
0 ignored issues
show
Bug introduced by
It seems like $encoded can also be of type array; however, parameter $value of Egulias\EmailValidator\EmailLexer::isValid() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

251
        if ($this->isValid(/** @scrutinizer ignore-type */ $encoded)) {
Loading history...
252 229
            return $this->charValue[$encoded];
253
        }
254
255 262
        if ($this->isNullType($encoded)) {
0 ignored issues
show
Bug introduced by
It seems like $encoded can also be of type array; however, parameter $value of Egulias\EmailValidator\EmailLexer::isNullType() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

255
        if ($this->isNullType(/** @scrutinizer ignore-type */ $encoded)) {
Loading history...
256 2
            return self::C_NUL;
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::C_NUL returns the type integer which is incompatible with the return type mandated by Doctrine\Common\Lexer\AbstractLexer::getType() of Doctrine\Common\Lexer\T|null.

In the issue above, the returned value is violating the contract defined by the mentioned interface.

Let's take a look at an example:

interface HasName {
    /** @return string */
    public function getName();
}

class Name {
    public $name;
}

class User implements HasName {
    /** @return string|Name */
    public function getName() {
        return new Name('foo'); // This is a violation of the ``HasName`` interface
                                // which only allows a string value to be returned.
    }
}
Loading history...
257
        }
258
259 261
        if ($this->isInvalidChar($encoded)) {
0 ignored issues
show
Bug introduced by
It seems like $encoded can also be of type array; however, parameter $value of Egulias\EmailValidator\EmailLexer::isInvalidChar() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

259
        if ($this->isInvalidChar(/** @scrutinizer ignore-type */ $encoded)) {
Loading history...
260 66
            $this->hasInvalidTokens = true;
261 66
            return self::INVALID;
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::INVALID returns the type integer which is incompatible with the return type mandated by Doctrine\Common\Lexer\AbstractLexer::getType() of Doctrine\Common\Lexer\T|null.

In the issue above, the returned value is violating the contract defined by the mentioned interface.

Let's take a look at an example:

interface HasName {
    /** @return string */
    public function getName();
}

class Name {
    public $name;
}

class User implements HasName {
    /** @return string|Name */
    public function getName() {
        return new Name('foo'); // This is a violation of the ``HasName`` interface
                                // which only allows a string value to be returned.
    }
}
Loading history...
262
        }
263
264
265 199
        return  self::GENERIC;
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::GENERIC returns the type integer which is incompatible with the return type mandated by Doctrine\Common\Lexer\AbstractLexer::getType() of Doctrine\Common\Lexer\T|null.

In the issue above, the returned value is violating the contract defined by the mentioned interface.

Let's take a look at an example:

interface HasName {
    /** @return string */
    public function getName();
}

class Name {
    public $name;
}

class User implements HasName {
    /** @return string|Name */
    public function getName() {
        return new Name('foo'); // This is a violation of the ``HasName`` interface
                                // which only allows a string value to be returned.
    }
}
Loading history...
266
    }
267
268 300
    protected function isValid(string $value) : bool
269
    {
270 300
        return isset($this->charValue[$value]);
271
    }
272
273 262
    protected function isNullType(string $value) : bool
274
    {
275 262
        return $value === "\0";
276
    }
277
278 261
    protected function isInvalidChar(string $value) : bool
279
    {
280 261
        return !preg_match(self::INVALID_CHARS_REGEX, $value);
281
    }
282
283
    protected function isUTF8Invalid(string $value) : bool
284
    {
285
        return preg_match(self::VALID_UTF8_REGEX, $value) !== false;
286
    }
287
288 191
    public function hasInvalidTokens() : bool
289
    {
290 191
        return $this->hasInvalidTokens;
291
    }
292
293
    /**
294
     * getPrevious
295
     *
296
     * @return array
297
     */
298 178
    public function getPrevious() : array
299
    {
300 178
        return $this->previous;
301
    }
302
303
    /**
304
     * Lexical catchable patterns.
305
     *
306
     * @return string[]
307
     */
308 301
    protected function getCatchablePatterns() : array
309
    {
310 301
        return self::CATCHABLE_PATTERNS;
311
    }
312
313
    /**
314
     * Lexical non-catchable patterns.
315
     *
316
     * @return string[]
317
     */
318 301
    protected function getNonCatchablePatterns() : array
319
    {
320 301
        return self::NON_CATCHABLE_PATTERNS;
321
    }
322
323 301
    protected function getModifiers() : string
324
    {
325 301
        return self::MODIFIERS;
326
    }
327
328 151
    public function getAccumulatedValues() : string
329
    {
330 151
        return $this->accumulator;
331
    }
332
333 188
    public function startRecording() : void
334
    {
335 188
        $this->hasToRecord = true;
336
    }
337
338 148
    public function stopRecording() : void
339
    {
340 148
        $this->hasToRecord = false;
341
    }
342
343 149
    public function clearRecorded() : void
344
    {
345 149
        $this->accumulator = '';
346
    }
347
}
348