Utf8TokenMatcher   F
last analyzed

Complexity

Total Complexity 60

Size/Duplication

Total Lines 265
Duplicated Lines 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
eloc 196
dl 0
loc 265
rs 3.6
c 3
b 0
f 0
wmc 60

1 Method

Rating   Name   Duplication   Size   Complexity  
F match() 0 263 60

How to fix   Complexity   

Complex Class

Complex classes like Utf8TokenMatcher often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use Utf8TokenMatcher, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
/**
4
 * Unicode UTF-8 token matcher.
5
 *
6
 * Auto-generated file, please don't edit manually.
7
 * Generated by UniLex.
8
 */
9
10
declare(strict_types=1);
11
12
namespace Remorhaz\UniLex\Unicode\Grammar;
13
14
use Remorhaz\UniLex\IO\CharBufferInterface;
15
use Remorhaz\UniLex\Lexer\TokenFactoryInterface;
16
use Remorhaz\UniLex\Lexer\TokenMatcherTemplate;
17
18
class Utf8TokenMatcher extends TokenMatcherTemplate
19
{
20
    public function match(CharBufferInterface $buffer, TokenFactoryInterface $tokenFactory): bool
21
    {
22
        $context = $this->createContext($buffer, $tokenFactory);
23
        goto state1;
24
25
        state1:
26
        if ($context->getBuffer()->isEnd()) {
27
            goto error;
28
        }
29
        $char = $context->getBuffer()->getSymbol();
30
        if (0x00 <= $char && $char <= 0x7F) {
31
            $context->getBuffer()->nextSymbol();
32
            // [\x00-\x7F]
33
            $context
34
                ->setNewToken(TokenType::SYMBOL)
35
                ->setTokenAttribute(TokenAttribute::UNICODE_CHAR, $char);
36
37
            return true;
38
        }
39
        if (0xC0 <= $char && $char <= 0xDF) {
40
            $context->getBuffer()->nextSymbol();
41
            goto state3;
42
        }
43
        if (0xE0 <= $char && $char <= 0xEF) {
44
            $context->getBuffer()->nextSymbol();
45
            goto state4;
46
        }
47
        if (0xF0 <= $char && $char <= 0xF7) {
48
            $context->getBuffer()->nextSymbol();
49
            goto state5;
50
        }
51
        if (0xF8 <= $char && $char <= 0xFB) {
52
            $context->getBuffer()->nextSymbol();
53
            goto state6;
54
        }
55
        if (0xFC == $char || 0xFD == $char) {
56
            $context->getBuffer()->nextSymbol();
57
            goto state7;
58
        }
59
        goto error;
60
61
        state3:
62
        if ($context->getBuffer()->isEnd()) {
63
            goto error;
64
        }
65
        $char = $context->getBuffer()->getSymbol();
66
        if (0x80 <= $char && $char <= 0xBF) {
67
            $context->getBuffer()->nextSymbol();
68
            // [\xC0-\xDF][\x80-\xBF]
69
            $charList = array_slice($context->getSymbolList(), -2);
70
            $symbol = ($charList[0] & 0x1F) << 6;
71
            $symbol |= ($charList[1] & 0x3F);
72
            $context
73
                ->setNewToken(TokenType::SYMBOL)
74
                ->setTokenAttribute(TokenAttribute::UNICODE_CHAR, $symbol);
75
76
            return true;
77
        }
78
        goto error;
79
80
        state4:
81
        if ($context->getBuffer()->isEnd()) {
82
            goto error;
83
        }
84
        $char = $context->getBuffer()->getSymbol();
85
        if (0x80 <= $char && $char <= 0xBF) {
86
            $context->getBuffer()->nextSymbol();
87
            goto state20;
88
        }
89
        goto error;
90
91
        state5:
92
        if ($context->getBuffer()->isEnd()) {
93
            goto error;
94
        }
95
        $char = $context->getBuffer()->getSymbol();
96
        if (0x80 <= $char && $char <= 0xBF) {
97
            $context->getBuffer()->nextSymbol();
98
            goto state17;
99
        }
100
        goto error;
101
102
        state6:
103
        if ($context->getBuffer()->isEnd()) {
104
            goto error;
105
        }
106
        $char = $context->getBuffer()->getSymbol();
107
        if (0x80 <= $char && $char <= 0xBF) {
108
            $context->getBuffer()->nextSymbol();
109
            goto state13;
110
        }
111
        goto error;
112
113
        state7:
114
        if ($context->getBuffer()->isEnd()) {
115
            goto error;
116
        }
117
        $char = $context->getBuffer()->getSymbol();
118
        if (0x80 <= $char && $char <= 0xBF) {
119
            $context->getBuffer()->nextSymbol();
120
            goto state8;
121
        }
122
        goto error;
123
124
        state8:
125
        if ($context->getBuffer()->isEnd()) {
126
            goto error;
127
        }
128
        $char = $context->getBuffer()->getSymbol();
129
        if (0x80 <= $char && $char <= 0xBF) {
130
            $context->getBuffer()->nextSymbol();
131
            goto state9;
132
        }
133
        goto error;
134
135
        state9:
136
        if ($context->getBuffer()->isEnd()) {
137
            goto error;
138
        }
139
        $char = $context->getBuffer()->getSymbol();
140
        if (0x80 <= $char && $char <= 0xBF) {
141
            $context->getBuffer()->nextSymbol();
142
            goto state10;
143
        }
144
        goto error;
145
146
        state10:
147
        if ($context->getBuffer()->isEnd()) {
148
            goto error;
149
        }
150
        $char = $context->getBuffer()->getSymbol();
151
        if (0x80 <= $char && $char <= 0xBF) {
152
            $context->getBuffer()->nextSymbol();
153
            goto state11;
154
        }
155
        goto error;
156
157
        state11:
158
        if ($context->getBuffer()->isEnd()) {
159
            goto error;
160
        }
161
        $char = $context->getBuffer()->getSymbol();
162
        if (0x80 <= $char && $char <= 0xBF) {
163
            $context->getBuffer()->nextSymbol();
164
            // [\xFC-\xFD][\x80-\xBF]{5}
165
            $charList = array_slice($context->getSymbolList(), -6);
166
            $symbol = ($charList[0] & 0x01) << 30;
167
            $symbol |= ($charList[1] & 0x03) << 24;
168
            $symbol |= ($charList[2] & 0x3F) << 18;
169
            $symbol |= ($charList[3] & 0x3F) << 12;
170
            $symbol |= ($charList[4] & 0x3F) << 6;
171
            $symbol |= ($charList[5] & 0x3F);
172
            $context
173
                ->setNewToken(TokenType::SYMBOL)
174
                ->setTokenAttribute(TokenAttribute::UNICODE_CHAR, $symbol);
175
176
            return true;
177
        }
178
        goto error;
179
180
        state13:
181
        if ($context->getBuffer()->isEnd()) {
182
            goto error;
183
        }
184
        $char = $context->getBuffer()->getSymbol();
185
        if (0x80 <= $char && $char <= 0xBF) {
186
            $context->getBuffer()->nextSymbol();
187
            goto state14;
188
        }
189
        goto error;
190
191
        state14:
192
        if ($context->getBuffer()->isEnd()) {
193
            goto error;
194
        }
195
        $char = $context->getBuffer()->getSymbol();
196
        if (0x80 <= $char && $char <= 0xBF) {
197
            $context->getBuffer()->nextSymbol();
198
            goto state15;
199
        }
200
        goto error;
201
202
        state15:
203
        if ($context->getBuffer()->isEnd()) {
204
            goto error;
205
        }
206
        $char = $context->getBuffer()->getSymbol();
207
        if (0x80 <= $char && $char <= 0xBF) {
208
            $context->getBuffer()->nextSymbol();
209
            // [\xF8-\xFB][\x80-\xBF]{4}
210
            $charList = array_slice($context->getSymbolList(), -5);
211
            $symbol = ($charList[0] & 0x03) << 24;
212
            $symbol |= ($charList[1] & 0x3F) << 18;
213
            $symbol |= ($charList[2] & 0x3F) << 12;
214
            $symbol |= ($charList[3] & 0x3F) << 6;
215
            $symbol |= ($charList[4] & 0x3F);
216
            $context
217
                ->setNewToken(TokenType::SYMBOL)
218
                ->setTokenAttribute(TokenAttribute::UNICODE_CHAR, $symbol);
219
220
            return true;
221
        }
222
        goto error;
223
224
        state17:
225
        if ($context->getBuffer()->isEnd()) {
226
            goto error;
227
        }
228
        $char = $context->getBuffer()->getSymbol();
229
        if (0x80 <= $char && $char <= 0xBF) {
230
            $context->getBuffer()->nextSymbol();
231
            goto state18;
232
        }
233
        goto error;
234
235
        state18:
236
        if ($context->getBuffer()->isEnd()) {
237
            goto error;
238
        }
239
        $char = $context->getBuffer()->getSymbol();
240
        if (0x80 <= $char && $char <= 0xBF) {
241
            $context->getBuffer()->nextSymbol();
242
            // [\xF0-\xF7][\x80-\xBF]{3}
243
            $charList = array_slice($context->getSymbolList(), -4);
244
            $symbol = ($charList[0] & 0x07) << 18;
245
            $symbol |= ($charList[1] & 0x3F) << 12;
246
            $symbol |= ($charList[2] & 0x3F) << 6;
247
            $symbol |= ($charList[3] & 0x3F);
248
            $context
249
                ->setNewToken(TokenType::SYMBOL)
250
                ->setTokenAttribute(TokenAttribute::UNICODE_CHAR, $symbol);
251
252
            return true;
253
        }
254
        goto error;
255
256
        state20:
257
        if ($context->getBuffer()->isEnd()) {
258
            goto error;
259
        }
260
        $char = $context->getBuffer()->getSymbol();
261
        if (0x80 <= $char && $char <= 0xBF) {
262
            $context->getBuffer()->nextSymbol();
263
            // [\xE0-\xEF][\x80-\xBF]{2}
264
            $charList = array_slice($context->getSymbolList(), -3);
265
            $symbol = ($charList[0] & 0x0F) << 12;
266
            $symbol |= ($charList[1] & 0x3F) << 6;
267
            $symbol |= ($charList[2] & 0x3F);
268
            $context
269
                ->setNewToken(TokenType::SYMBOL)
270
                ->setTokenAttribute(TokenAttribute::UNICODE_CHAR, $symbol);
271
272
            return true;
273
        }
274
        goto error;
275
276
        error:
277
        if ($context->getBuffer()->isEnd()) {
278
            return false;
279
        }
280
        $context->getBuffer()->nextSymbol();
281
        $context->setNewToken(TokenType::INVALID_BYTES);
282
        return true;
283
    }
284
}
285