Completed
Pull Request — master (#4)
by Nico
02:45 queued 31s
created

Tokenizer   A

Complexity

Total Complexity 15

Size/Duplication

Total Lines 151
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 1

Test Coverage

Coverage 97.18%

Importance

Changes 0
Metric Value
wmc 15
lcom 1
cbo 1
dl 0
loc 151
ccs 69
cts 71
cp 0.9718
rs 10
c 0
b 0
f 0

7 Methods

Rating   Name   Duplication   Size   Complexity  
A getQueue() 0 10 2
B __construct() 0 32 1
A tokenize() 0 22 2
A registerToken() 0 10 1
A getMatchedToken() 0 10 4
A getRegex() 0 15 4
A getPriority() 0 4 1
1
<?php
2
3
/**
4
 * @license     http://opensource.org/licenses/mit-license.php MIT
5
 * @link        https://github.com/nicoSWD
6
 * @author      Nicolas Oelgart <[email protected]>
7
 */
8
declare(strict_types=1);
9
10
namespace nicoSWD\Rules;
11
12
use SplPriorityQueue;
13
use stdClass;
14
15
final class Tokenizer implements TokenizerInterface
16
{
17
    const TOKEN_AND = 'And';
18
    const TOKEN_OR = 'Or';
19
    const TOKEN_NOT_EQUAL_STRICT = 'NotEqualStrict';
20
    const TOKEN_NOT_EQUAL = 'NotEqual';
21
    const TOKEN_EQUAL_STRICT = 'EqualStrict';
22
    const TOKEN_EQUAL = 'Equal';
23
    const TOKEN_IN = 'In';
24
    const TOKEN_BOOL = 'Bool';
25
    const TOKEN_NULL = 'Null';
26
    const TOKEN_METHOD = 'Method';
27
    const TOKEN_FUNCTION = 'Function';
28
    const TOKEN_VARIABLE = 'Variable';
29
    const TOKEN_FLOAT = 'Float';
30
    const TOKEN_INTEGER = 'Integer';
31
    const TOKEN_ENCAPSED_STRING = 'EncapsedString';
32
    const TOKEN_SMALLER_EQUAL = 'SmallerEqual';
33
    const TOKEN_GREATER_EQUAL = 'GreaterEqual';
34
    const TOKEN_SMALLER = 'Smaller';
35
    const TOKEN_GREATER = 'Greater';
36
    const TOKEN_OPENING_PARENTHESIS = 'OpeningParentheses';
37
    const TOKEN_CLOSING_PARENTHESIS = 'ClosingParentheses';
38
    const TOKEN_OPENING_ARRAY = 'OpeningArray';
39
    const TOKEN_CLOSING_ARRAY = 'ClosingArray';
40
    const TOKEN_COMMA = 'Comma';
41
    const TOKEN_REGEX = 'Regex';
42
    const TOKEN_COMMENT = 'Comment';
43
    const TOKEN_NEWLINE = 'Newline';
44
    const TOKEN_SPACE = 'Space';
45
    const TOKEN_UNKNOWN = 'Unknown';
46
47
    private $internalTokens = [];
48
49
    private $regex = '';
50
51
    private $regexRequiresReassembly = false;
52
53 234
    public function __construct()
54
    {
55 234
        $this->registerToken(self::TOKEN_AND, '&&', 145);
56 234
        $this->registerToken(self::TOKEN_OR, '\|\|', 140);
57 234
        $this->registerToken(self::TOKEN_NOT_EQUAL_STRICT, '!==', 135);
58 234
        $this->registerToken(self::TOKEN_NOT_EQUAL, '<>|!=', 130);
59 234
        $this->registerToken(self::TOKEN_EQUAL_STRICT, '===', 125);
60 234
        $this->registerToken(self::TOKEN_EQUAL, '==', 120);
61 234
        $this->registerToken(self::TOKEN_IN, '\bin\b', 115);
62 234
        $this->registerToken(self::TOKEN_BOOL, '\b(?:true|false)\b', 110);
63 234
        $this->registerToken(self::TOKEN_NULL, '\bnull\b', 105);
64 234
        $this->registerToken(self::TOKEN_METHOD, '\.\s*[a-zA-Z_]\w*\s*\(', 100);
65 234
        $this->registerToken(self::TOKEN_FUNCTION, '[a-zA-Z_]\w*\s*\(', 95);
66 234
        $this->registerToken(self::TOKEN_FLOAT, '-?\d+(?:\.\d+)', 90);
67 234
        $this->registerToken(self::TOKEN_INTEGER, '-?\d+', 85);
68 234
        $this->registerToken(self::TOKEN_ENCAPSED_STRING, '"[^"]*"|\'[^\']*\'', 80);
69 234
        $this->registerToken(self::TOKEN_SMALLER_EQUAL, '<=', 75);
70 234
        $this->registerToken(self::TOKEN_GREATER_EQUAL, '>=', 70);
71 234
        $this->registerToken(self::TOKEN_SMALLER, '<', 65);
72 234
        $this->registerToken(self::TOKEN_GREATER, '>', 60);
73 234
        $this->registerToken(self::TOKEN_OPENING_PARENTHESIS, '\(', 55);
74 234
        $this->registerToken(self::TOKEN_CLOSING_PARENTHESIS, '\)', 50);
75 234
        $this->registerToken(self::TOKEN_OPENING_ARRAY, '\[', 45);
76 234
        $this->registerToken(self::TOKEN_CLOSING_ARRAY, '\]', 40);
77 234
        $this->registerToken(self::TOKEN_COMMA, ',', 35);
78 234
        $this->registerToken(self::TOKEN_REGEX, '/[^/\*].*/[igm]{0,3}', 30);
79 234
        $this->registerToken(self::TOKEN_COMMENT, '//[^\r\n]*|/\*.*?\*/', 25);
80 234
        $this->registerToken(self::TOKEN_NEWLINE, '\r?\n', 20);
81 234
        $this->registerToken(self::TOKEN_SPACE, '\s+', 15);
82 234
        $this->registerToken(self::TOKEN_VARIABLE, '[a-zA-Z_]\w*', 10);
83 234
        $this->registerToken(self::TOKEN_UNKNOWN, '.', 5);
84 234
    }
85
86
87 224
    public function tokenize(string $string) : Stack
88
    {
89 224
        $stack = new Stack();
90 224
        $regex = $this->getRegex();
91 224
        $baseNameSpace = __NAMESPACE__ . '\\Tokens\\Token';
92 224
        $offset = 0;
93
94 224
        while (preg_match($regex, $string, $matches, 0, $offset)) {
95 222
            $token = $this->getMatchedToken($matches);
96 222
            $className = $baseNameSpace . $token;
97
98 222
            $stack->attach(new $className(
99 222
                $matches[$token],
100
                $offset,
101
                $stack
102
            ));
103
104 222
            $offset += strlen($matches[0]);
105
        }
106
107 224
        return $stack;
108
    }
109
110 234
    public function registerToken(string $class, string $regex, int $priority = null)
111
    {
112 234
        $token = new stdClass();
113 234
        $token->class = $class;
114 234
        $token->regex = $regex;
115 234
        $token->priority = $priority ?? $this->getPriority($class);
116
117 234
        $this->internalTokens[$class] = $token;
118 234
        $this->regexRequiresReassembly = true;
119 234
    }
120
121 224
    private function getMatchedToken(array $matches) : string
122
    {
123 224
        foreach ($matches as $key => $value) {
124 222
            if ($value !== '' && !is_int($key)) {
125 222
                return $key;
126
            }
127
        }
128
129 2
        return 'Unknown';
130
    }
131
132
133 224
    private function getRegex() : string
134
    {
135 224
        if (!$this->regex || $this->regexRequiresReassembly) {
136 224
            $regex = [];
137
138 224
            foreach ($this->getQueue() as $token) {
139 224
                $regex[] = "(?<$token->class>$token->regex)";
140
            }
141
142 224
            $this->regex = sprintf('~(%s)~As', implode('|', $regex));
143 224
            $this->regexRequiresReassembly = false;
144
        }
145
146 224
        return $this->regex;
147
    }
148
149
150 224
    private function getQueue() : SplPriorityQueue
151
    {
152 224
        $queue = new SplPriorityQueue();
153
154 224
        foreach ($this->internalTokens as $class) {
155 224
            $queue->insert($class, $class->priority);
156
        }
157
158 224
        return $queue;
159
    }
160
161
    private function getPriority(string $class) : int
162
    {
163
        return $this->internalTokens[$class]->priority ?? 10;
164
    }
165
}
166