Completed
Push — master ( 73afad...15a795 )
by Nico
04:02
created

Tokenizer   A

Complexity

Total Complexity 15

Size/Duplication

Total Lines 151
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 1

Test Coverage

Coverage 97.26%

Importance

Changes 0
Metric Value
wmc 15
lcom 1
cbo 1
dl 0
loc 151
ccs 71
cts 73
cp 0.9726
rs 10
c 0
b 0
f 0

7 Methods

Rating   Name   Duplication   Size   Complexity  
A getQueue() 0 10 2
B __construct() 0 32 1
A registerToken() 0 10 1
A getMatchedToken() 0 10 4
A getRegex() 0 15 4
A getPriority() 0 4 1
A tokenize() 0 22 2
1
<?php
2
3
/**
4
 * @license     http://opensource.org/licenses/mit-license.php MIT
5
 * @link        https://github.com/nicoSWD
6
 * @author      Nicolas Oelgart <[email protected]>
7
 */
8
declare(strict_types=1);
9
10
namespace nicoSWD\Rules;
11
12
use SplPriorityQueue;
13
use stdClass;
14
15
final class Tokenizer implements TokenizerInterface
16
{
17
    const TOKEN_AND = 'And';
18
    const TOKEN_OR = 'Or';
19
    const TOKEN_NOT_EQUAL_STRICT = 'NotEqualStrict';
20
    const TOKEN_NOT_EQUAL = 'NotEqual';
21
    const TOKEN_EQUAL_STRICT = 'EqualStrict';
22
    const TOKEN_EQUAL = 'Equal';
23
    const TOKEN_IN = 'In';
24
    const TOKEN_BOOL = 'Bool';
25
    const TOKEN_NULL = 'Null';
26
    const TOKEN_METHOD = 'Method';
27
    const TOKEN_FUNCTION = 'Function';
28
    const TOKEN_VARIABLE = 'Variable';
29
    const TOKEN_FLOAT = 'Float';
30
    const TOKEN_INTEGER = 'Integer';
31
    const TOKEN_ENCAPSED_STRING = 'EncapsedString';
32
    const TOKEN_SMALLER_EQUAL = 'SmallerEqual';
33
    const TOKEN_GREATER_EQUAL = 'GreaterEqual';
34
    const TOKEN_SMALLER = 'Smaller';
35
    const TOKEN_GREATER = 'Greater';
36
    const TOKEN_OPENING_PARENTHESIS = 'OpeningParentheses';
37
    const TOKEN_CLOSING_PARENTHESIS = 'ClosingParentheses';
38
    const TOKEN_OPENING_ARRAY = 'OpeningArray';
39
    const TOKEN_CLOSING_ARRAY = 'ClosingArray';
40
    const TOKEN_COMMA = 'Comma';
41
    const TOKEN_REGEX = 'Regex';
42
    const TOKEN_COMMENT = 'Comment';
43
    const TOKEN_NEWLINE = 'Newline';
44
    const TOKEN_SPACE = 'Space';
45
    const TOKEN_UNKNOWN = 'Unknown';
46
47
    private $internalTokens = [];
48
49
    private $regex = '';
50
51
    private $regexRequiresReassembly = false;
52
53 238
    public function __construct()
54
    {
55 238
        $this->registerToken(self::TOKEN_AND, '&&', 145);
56 238
        $this->registerToken(self::TOKEN_OR, '\|\|', 140);
57 238
        $this->registerToken(self::TOKEN_NOT_EQUAL_STRICT, '!==', 135);
58 238
        $this->registerToken(self::TOKEN_NOT_EQUAL, '<>|!=', 130);
59 238
        $this->registerToken(self::TOKEN_EQUAL_STRICT, '===', 125);
60 238
        $this->registerToken(self::TOKEN_EQUAL, '==', 120);
61 238
        $this->registerToken(self::TOKEN_IN, '\bin\b', 115);
62 238
        $this->registerToken(self::TOKEN_BOOL, '\b(?:true|false)\b', 110);
63 238
        $this->registerToken(self::TOKEN_NULL, '\bnull\b', 105);
64 238
        $this->registerToken(self::TOKEN_METHOD, '\.\s*[a-zA-Z_]\w*\s*\(', 100);
65 238
        $this->registerToken(self::TOKEN_FUNCTION, '[a-zA-Z_]\w*\s*\(', 95);
66 238
        $this->registerToken(self::TOKEN_FLOAT, '-?\d+(?:\.\d+)', 90);
67 238
        $this->registerToken(self::TOKEN_INTEGER, '-?\d+', 85);
68 238
        $this->registerToken(self::TOKEN_ENCAPSED_STRING, '"[^"]*"|\'[^\']*\'', 80);
69 238
        $this->registerToken(self::TOKEN_SMALLER_EQUAL, '<=', 75);
70 238
        $this->registerToken(self::TOKEN_GREATER_EQUAL, '>=', 70);
71 238
        $this->registerToken(self::TOKEN_SMALLER, '<', 65);
72 238
        $this->registerToken(self::TOKEN_GREATER, '>', 60);
73 238
        $this->registerToken(self::TOKEN_OPENING_PARENTHESIS, '\(', 55);
74 238
        $this->registerToken(self::TOKEN_CLOSING_PARENTHESIS, '\)', 50);
75 238
        $this->registerToken(self::TOKEN_OPENING_ARRAY, '\[', 45);
76 238
        $this->registerToken(self::TOKEN_CLOSING_ARRAY, '\]', 40);
77 238
        $this->registerToken(self::TOKEN_COMMA, ',', 35);
78 238
        $this->registerToken(self::TOKEN_REGEX, '/[^/\*].*/[igm]{0,3}', 30);
79 238
        $this->registerToken(self::TOKEN_COMMENT, '//[^\r\n]*|/\*.*?\*/', 25);
80 238
        $this->registerToken(self::TOKEN_NEWLINE, '\r?\n', 20);
81 238
        $this->registerToken(self::TOKEN_SPACE, '\s+', 15);
82 238
        $this->registerToken(self::TOKEN_VARIABLE, '[a-zA-Z_]\w*', 10);
83 238
        $this->registerToken(self::TOKEN_UNKNOWN, '.', 5);
84 238
    }
85
86
87 228
    public function tokenize(string $string) : Stack
88
    {
89 228
        $stack = new Stack();
90 228
        $regex = $this->getRegex();
91 228
        $baseNameSpace = __NAMESPACE__ . '\\Tokens\\Token';
92 228
        $offset = 0;
93
94 228
        while (preg_match($regex, $string, $matches, 0, $offset)) {
95 226
            $token = $this->getMatchedToken($matches);
96 226
            $className = $baseNameSpace . $token;
97
98 226
            $stack->attach(new $className(
99 226
                $matches[$token],
100 226
                $offset,
101 226
                $stack
102
            ));
103
104 226
            $offset += strlen($matches[0]);
105
        }
106
107 228
        return $stack;
108
    }
109
110 238
    public function registerToken(string $class, string $regex, int $priority = null)
111
    {
112 238
        $token = new stdClass();
113 238
        $token->class = $class;
114 238
        $token->regex = $regex;
115 238
        $token->priority = $priority ?? $this->getPriority($class);
116
117 238
        $this->internalTokens[$class] = $token;
118 238
        $this->regexRequiresReassembly = true;
119 238
    }
120
121 228
    private function getMatchedToken(array $matches) : string
122
    {
123 228
        foreach ($matches as $key => $value) {
124 226
            if ($value !== '' && !is_int($key)) {
125 226
                return $key;
126
            }
127
        }
128
129 2
        return 'Unknown';
130
    }
131
132
133 228
    private function getRegex() : string
134
    {
135 228
        if (!$this->regex || $this->regexRequiresReassembly) {
136 228
            $regex = [];
137
138 228
            foreach ($this->getQueue() as $token) {
139 228
                $regex[] = "(?<$token->class>$token->regex)";
140
            }
141
142 228
            $this->regex = sprintf('~(%s)~As', implode('|', $regex));
143 228
            $this->regexRequiresReassembly = false;
144
        }
145
146 228
        return $this->regex;
147
    }
148
149
150 228
    private function getQueue() : SplPriorityQueue
151
    {
152 228
        $queue = new SplPriorityQueue();
153
154 228
        foreach ($this->internalTokens as $class) {
155 228
            $queue->insert($class, $class->priority);
156
        }
157
158 228
        return $queue;
159
    }
160
161
    private function getPriority(string $class) : int
162
    {
163
        return $this->internalTokens[$class]->priority ?? 10;
164
    }
165
}
166