Tokenizer   A
last analyzed

Complexity

Total Complexity 3

Size/Duplication

Total Lines 106
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 22
c 1
b 0
f 0
dl 0
loc 106
ccs 12
cts 12
cp 1
rs 10
wmc 3

2 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 3 1
A tokenize() 0 11 2
1
<?php declare(strict_types = 1);
2
3
namespace Apicart\FQL\Tokenizer;
4
5
use Apicart\FQL\Contract\Tokenizer\TokenizerInterface;
6
use Apicart\FQL\Token\Token\Range;
7
use Apicart\FQL\Value\TokenSequence;
8
9
final class Tokenizer implements TokenizerInterface
10
{
11
12
    /**
13
     * Represents the whitespace in the input string.
14
     */
15
    public const TOKEN_WHITESPACE = 1;
16
17
    /**
18
     * Combines two adjoining elements with logical AND.
19
     */
20
    public const TOKEN_LOGICAL_AND = 2;
21
22
    /**
23
     * Combines two adjoining elements with logical OR.
24
     */
25
    public const TOKEN_LOGICAL_OR = 4;
26
27
    /**
28
     * Applies logical NOT to the next (right-side) element.
29
     */
30
    public const TOKEN_LOGICAL_NOT = 8;
31
32
    /**
33
     * Applies logical NOT to the next (right-side) element.
34
     *
35
     * This is an alternative to the TOKEN_LOGICAL_NOT, with the difference that
36
     * parser will expect it's placed next (left) to the element it applies to,
37
     * without the whitespace in between.
38
     */
39
    public const TOKEN_LOGICAL_NOT_2 = 16;
40
41
    /**
42
     * Mandatory operator applies to the next (right-side) element and means
43
     * that the element must be present. There must be no whitespace between it
44
     * and the element it applies to.
45
     */
46
    public const TOKEN_MANDATORY = 32;
47
48
    /**
49
     * Prohibited operator applies to the next (right-side) element and means
50
     * that the element must not be present. There must be no whitespace between
51
     * it and the element it applies to.
52
     */
53
    public const TOKEN_PROHIBITED = 64;
54
55
    /**
56
     * Left side delimiter of a group.
57
     *
58
     * Group is used to group elements in order to form a sub-query.
59
     *
60
     * @see GroupBegin
61
     */
62
    public const TOKEN_GROUP_BEGIN = 128;
63
64
    /**
65
     * Right side delimiter of a group.
66
     *
67
     * Group is used to group elements in order to form a sub-query.
68
     */
69
    public const TOKEN_GROUP_END = 256;
70
71
    /**
72
     * Term token type represents a category of term type tokens.
73
     *
74
     * This type is intended to be used as an extension point through subtyping.
75
     *
76
     * @see Phrase
77
     * @see Tag
78
     * @see User
79
     * @see Word
80
     * @see Range
81
     */
82
    public const TOKEN_TERM = 512;
83
84
    /**
85
     * Bailout token.
86
     *
87
     * If token could not be recognized, next character is extracted into a
88
     * token of this type. Ignored by parser.
89
     */
90
    public const TOKEN_BAILOUT = 1024;
91
92
    /**
93
     * @var AbstractTokenExtractor
94
     */
95
    private $tokenExtractor;
96
97
98 505
    public function __construct(AbstractTokenExtractor $tokenExtractor)
99
    {
100 505
        $this->tokenExtractor = $tokenExtractor;
101 505
    }
102
103
104 505
    public function tokenize(string $string): TokenSequence
105
    {
106 505
        $length = mb_strlen($string);
107 505
        $position = 0;
108 505
        $tokens = [];
109 505
        while ($position < $length) {
110 504
            $token = $this->tokenExtractor->extract($string, $position);
111 504
            $position += mb_strlen($token->getLexeme());
112 504
            $tokens[] = $token;
113
        }
114 505
        return new TokenSequence($tokens, $string);
115
    }
116
117
}
118