| Conditions | 36 |
| Paths | 112 |
| Total Lines | 208 |
| Code Lines | 162 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 1 | ||
| Bugs | 0 | Features | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | <?php |
||
| 21 | public function match(CharBufferInterface $buffer, TokenFactoryInterface $tokenFactory): bool |
||
| 22 | { |
||
| 23 | $context = $this->createContext($buffer, $tokenFactory); |
||
| 24 | $context->setRegExps( |
||
| 25 | '[\\x00-\\x7F]', |
||
| 26 | '[\\xC0-\\xDF][\\x80-\\xBF]', |
||
| 27 | '[\\xE0-\\xEF][\\x80-\\xBF]{2}', |
||
| 28 | '[\\xF0-\\xF7][\\x80-\\xBF]{3}', |
||
| 29 | '[\\xF8-\\xFB][\\x80-\\xBF]{4}', |
||
| 30 | '[\\xFC-\\xFD][\\x80-\\xBF]{5}' |
||
| 31 | ); |
||
| 32 | goto state1; |
||
| 33 | |||
| 34 | state1: |
||
| 35 | if ($context->getBuffer()->isEnd()) { |
||
| 36 | goto error; |
||
| 37 | } |
||
| 38 | $char = $context->getBuffer()->getSymbol(); |
||
| 39 | if (0x00 <= $char && $char <= 0x7F) { |
||
| 40 | $context->getBuffer()->nextSymbol(); |
||
| 41 | $context->allowRegExps('[\\x00-\\x7F]'); |
||
| 42 | goto state2; |
||
| 43 | } |
||
| 44 | if (0xC0 <= $char && $char <= 0xDF) { |
||
| 45 | $context->getBuffer()->nextSymbol(); |
||
| 46 | $context->allowRegExps('[\\xC0-\\xDF][\\x80-\\xBF]'); |
||
| 47 | goto state3; |
||
| 48 | } |
||
| 49 | if (0xE0 <= $char && $char <= 0xEF) { |
||
| 50 | $context->getBuffer()->nextSymbol(); |
||
| 51 | $context->allowRegExps('[\\xE0-\\xEF][\\x80-\\xBF]{2}'); |
||
| 52 | goto state4; |
||
| 53 | } |
||
| 54 | if (0xF0 <= $char && $char <= 0xF7) { |
||
| 55 | $context->getBuffer()->nextSymbol(); |
||
| 56 | $context->allowRegExps('[\\xF0-\\xF7][\\x80-\\xBF]{3}'); |
||
| 57 | goto state5; |
||
| 58 | } |
||
| 59 | if (0xF8 <= $char && $char <= 0xFB) { |
||
| 60 | $context->getBuffer()->nextSymbol(); |
||
| 61 | $context->allowRegExps('[\\xF8-\\xFB][\\x80-\\xBF]{4}'); |
||
| 62 | goto state6; |
||
| 63 | } |
||
| 64 | if (0xFC == $char || 0xFD == $char) { |
||
| 65 | $context->getBuffer()->nextSymbol(); |
||
| 66 | $context->allowRegExps('[\\xFC-\\xFD][\\x80-\\xBF]{5}'); |
||
| 67 | goto state7; |
||
| 68 | } |
||
| 69 | goto error; |
||
| 70 | |||
| 71 | state2: |
||
| 72 | switch ($context->getRegExp()) { |
||
| 73 | case '[\\x00-\\x7F]': |
||
| 74 | $context |
||
| 75 | ->setNewToken(TokenType::SYMBOL) |
||
| 76 | ->setTokenAttribute(TokenAttribute::UNICODE_CHAR, $char); |
||
| 77 | |||
| 78 | return true; |
||
| 79 | |||
| 80 | case '[\\xC0-\\xDF][\\x80-\\xBF]': |
||
| 81 | $charList = array_slice($context->getSymbolList(), -2); |
||
| 82 | $symbol = ($charList[0] & 0x1F) << 6; |
||
| 83 | $symbol |= ($charList[1] & 0x3F); |
||
| 84 | $context |
||
| 85 | ->setNewToken(TokenType::SYMBOL) |
||
| 86 | ->setTokenAttribute(TokenAttribute::UNICODE_CHAR, $symbol); |
||
| 87 | |||
| 88 | return true; |
||
| 89 | |||
| 90 | case '[\\xE0-\\xEF][\\x80-\\xBF]{2}': |
||
| 91 | $charList = array_slice($context->getSymbolList(), -3); |
||
| 92 | $symbol = ($charList[0] & 0x0F) << 12; |
||
| 93 | $symbol |= ($charList[1] & 0x3F) << 6; |
||
| 94 | $symbol |= ($charList[2] & 0x3F); |
||
| 95 | $context |
||
| 96 | ->setNewToken(TokenType::SYMBOL) |
||
| 97 | ->setTokenAttribute(TokenAttribute::UNICODE_CHAR, $symbol); |
||
| 98 | |||
| 99 | return true; |
||
| 100 | |||
| 101 | case '[\\xF0-\\xF7][\\x80-\\xBF]{3}': |
||
| 102 | $charList = array_slice($context->getSymbolList(), -4); |
||
| 103 | $symbol = ($charList[0] & 0x07) << 18; |
||
| 104 | $symbol |= ($charList[1] & 0x3F) << 12; |
||
| 105 | $symbol |= ($charList[2] & 0x3F) << 6; |
||
| 106 | $symbol |= ($charList[3] & 0x3F); |
||
| 107 | $context |
||
| 108 | ->setNewToken(TokenType::SYMBOL) |
||
| 109 | ->setTokenAttribute(TokenAttribute::UNICODE_CHAR, $symbol); |
||
| 110 | |||
| 111 | return true; |
||
| 112 | |||
| 113 | case '[\\xF8-\\xFB][\\x80-\\xBF]{4}': |
||
| 114 | $charList = array_slice($context->getSymbolList(), -5); |
||
| 115 | $symbol = ($charList[0] & 0x03) << 24; |
||
| 116 | $symbol |= ($charList[1] & 0x3F) << 18; |
||
| 117 | $symbol |= ($charList[2] & 0x3F) << 12; |
||
| 118 | $symbol |= ($charList[3] & 0x3F) << 6; |
||
| 119 | $symbol |= ($charList[4] & 0x3F); |
||
| 120 | $context |
||
| 121 | ->setNewToken(TokenType::SYMBOL) |
||
| 122 | ->setTokenAttribute(TokenAttribute::UNICODE_CHAR, $symbol); |
||
| 123 | |||
| 124 | return true; |
||
| 125 | |||
| 126 | case '[\\xFC-\\xFD][\\x80-\\xBF]{5}': |
||
| 127 | $charList = array_slice($context->getSymbolList(), -6); |
||
| 128 | $symbol = ($charList[0] & 0x01) << 30; |
||
| 129 | $symbol |= ($charList[1] & 0x03) << 24; |
||
| 130 | $symbol |= ($charList[2] & 0x3F) << 18; |
||
| 131 | $symbol |= ($charList[3] & 0x3F) << 12; |
||
| 132 | $symbol |= ($charList[4] & 0x3F) << 6; |
||
| 133 | $symbol |= ($charList[5] & 0x3F); |
||
| 134 | $context |
||
| 135 | ->setNewToken(TokenType::SYMBOL) |
||
| 136 | ->setTokenAttribute(TokenAttribute::UNICODE_CHAR, $symbol); |
||
| 137 | |||
| 138 | return true; |
||
| 139 | |||
| 140 | default: |
||
| 141 | goto error; |
||
| 142 | } |
||
| 143 | |||
| 144 | state3: |
||
| 145 | if ($context->getBuffer()->isEnd()) { |
||
| 146 | goto error; |
||
| 147 | } |
||
| 148 | $char = $context->getBuffer()->getSymbol(); |
||
| 149 | if (0x80 <= $char && $char <= 0xBF) { |
||
| 150 | $context->getBuffer()->nextSymbol(); |
||
| 151 | $context->allowRegExps( |
||
| 152 | '[\\xC0-\\xDF][\\x80-\\xBF]', |
||
| 153 | '[\\xE0-\\xEF][\\x80-\\xBF]{2}', |
||
| 154 | '[\\xF0-\\xF7][\\x80-\\xBF]{3}', |
||
| 155 | '[\\xF8-\\xFB][\\x80-\\xBF]{4}', |
||
| 156 | '[\\xFC-\\xFD][\\x80-\\xBF]{5}' |
||
| 157 | ); |
||
| 158 | goto state2; |
||
| 159 | } |
||
| 160 | goto error; |
||
| 161 | |||
| 162 | state4: |
||
| 163 | if ($context->getBuffer()->isEnd()) { |
||
| 164 | goto error; |
||
| 165 | } |
||
| 166 | $char = $context->getBuffer()->getSymbol(); |
||
| 167 | if (0x80 <= $char && $char <= 0xBF) { |
||
| 168 | $context->getBuffer()->nextSymbol(); |
||
| 169 | $context->allowRegExps( |
||
| 170 | '[\\xE0-\\xEF][\\x80-\\xBF]{2}', |
||
| 171 | '[\\xF0-\\xF7][\\x80-\\xBF]{3}', |
||
| 172 | '[\\xF8-\\xFB][\\x80-\\xBF]{4}', |
||
| 173 | '[\\xFC-\\xFD][\\x80-\\xBF]{5}' |
||
| 174 | ); |
||
| 175 | goto state3; |
||
| 176 | } |
||
| 177 | goto error; |
||
| 178 | |||
| 179 | state5: |
||
| 180 | if ($context->getBuffer()->isEnd()) { |
||
| 181 | goto error; |
||
| 182 | } |
||
| 183 | $char = $context->getBuffer()->getSymbol(); |
||
| 184 | if (0x80 <= $char && $char <= 0xBF) { |
||
| 185 | $context->getBuffer()->nextSymbol(); |
||
| 186 | $context->allowRegExps( |
||
| 187 | '[\\xF0-\\xF7][\\x80-\\xBF]{3}', |
||
| 188 | '[\\xF8-\\xFB][\\x80-\\xBF]{4}', |
||
| 189 | '[\\xFC-\\xFD][\\x80-\\xBF]{5}' |
||
| 190 | ); |
||
| 191 | goto state4; |
||
| 192 | } |
||
| 193 | goto error; |
||
| 194 | |||
| 195 | state6: |
||
| 196 | if ($context->getBuffer()->isEnd()) { |
||
| 197 | goto error; |
||
| 198 | } |
||
| 199 | $char = $context->getBuffer()->getSymbol(); |
||
| 200 | if (0x80 <= $char && $char <= 0xBF) { |
||
| 201 | $context->getBuffer()->nextSymbol(); |
||
| 202 | $context->allowRegExps( |
||
| 203 | '[\\xF8-\\xFB][\\x80-\\xBF]{4}', |
||
| 204 | '[\\xFC-\\xFD][\\x80-\\xBF]{5}' |
||
| 205 | ); |
||
| 206 | goto state5; |
||
| 207 | } |
||
| 208 | goto error; |
||
| 209 | |||
| 210 | state7: |
||
| 211 | if ($context->getBuffer()->isEnd()) { |
||
| 212 | goto error; |
||
| 213 | } |
||
| 214 | $char = $context->getBuffer()->getSymbol(); |
||
| 215 | if (0x80 <= $char && $char <= 0xBF) { |
||
| 216 | $context->getBuffer()->nextSymbol(); |
||
| 217 | $context->allowRegExps('[\\xFC-\\xFD][\\x80-\\xBF]{5}'); |
||
| 218 | goto state6; |
||
| 219 | } |
||
| 220 | goto error; |
||
| 221 | |||
| 222 | error: |
||
| 223 | if ($context->getBuffer()->isEnd()) { |
||
| 224 | return false; |
||
| 225 | } |
||
| 226 | $context->getBuffer()->nextSymbol(); |
||
| 227 | $context->setNewToken(TokenType::INVALID_BYTES); |
||
| 228 | return true; |
||
| 229 | } |
||
| 231 |