| Total Complexity | 43 |
| Total Lines | 301 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like ReferenceParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use ReferenceParser, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 22 | final class ReferenceParser |
||
| 23 | { |
||
| 24 | // Looking for the start of a definition, i.e. `[` |
||
| 25 | private const START_DEFINITION = 0; |
||
| 26 | // Looking for and parsing the label, i.e. `[foo]` within `[foo]` |
||
| 27 | private const LABEL = 1; |
||
| 28 | // Parsing the destination, i.e. `/url` in `[foo]: /url` |
||
| 29 | private const DESTINATION = 2; |
||
| 30 | // Looking for the start of a title, i.e. the first `"` in `[foo]: /url "title"` |
||
| 31 | private const START_TITLE = 3; |
||
| 32 | // Parsing the content of the title, i.e. `title` in `[foo]: /url "title"` |
||
| 33 | private const TITLE = 4; |
||
| 34 | // End state, no matter what kind of lines we add, they won't be references |
||
| 35 | private const PARAGRAPH = 5; |
||
| 36 | |||
| 37 | /** @psalm-readonly-allow-private-mutation */ |
||
| 38 | private string $paragraph = ''; |
||
| 39 | |||
| 40 | /** |
||
| 41 | * @var array<int, ReferenceInterface> |
||
| 42 | * |
||
| 43 | * @psalm-readonly-allow-private-mutation |
||
| 44 | */ |
||
| 45 | private array $references = []; |
||
| 46 | |||
| 47 | /** @psalm-readonly-allow-private-mutation */ |
||
| 48 | private int $state = self::START_DEFINITION; |
||
| 49 | |||
| 50 | /** @psalm-readonly-allow-private-mutation */ |
||
| 51 | private ?string $label = null; |
||
| 52 | |||
| 53 | /** @psalm-readonly-allow-private-mutation */ |
||
| 54 | private ?string $destination = null; |
||
| 55 | |||
| 56 | /** |
||
| 57 | * @var string string |
||
| 58 | * |
||
| 59 | * @psalm-readonly-allow-private-mutation |
||
| 60 | */ |
||
| 61 | private string $title = ''; |
||
| 62 | |||
| 63 | /** @psalm-readonly-allow-private-mutation */ |
||
| 64 | private ?string $titleDelimiter = null; |
||
| 65 | |||
| 66 | /** @psalm-readonly-allow-private-mutation */ |
||
| 67 | private bool $referenceValid = false; |
||
| 68 | |||
| 69 | public function getParagraphContent(): string |
||
| 70 | { |
||
| 71 | return $this->paragraph; |
||
| 72 | } |
||
| 73 | |||
| 74 | /** |
||
| 75 | * @return ReferenceInterface[] |
||
| 76 | */ |
||
| 77 | public function getReferences(): iterable |
||
| 78 | { |
||
| 79 | $this->finishReference(); |
||
| 80 | |||
| 81 | return $this->references; |
||
| 82 | } |
||
| 83 | |||
| 84 | public function hasReferences(): bool |
||
| 85 | { |
||
| 86 | return $this->references !== []; |
||
| 87 | } |
||
| 88 | |||
| 89 | public function parse(string $line): void |
||
| 90 | { |
||
| 91 | if ($this->paragraph !== '') { |
||
| 92 | $this->paragraph .= "\n"; |
||
| 93 | } |
||
| 94 | |||
| 95 | $this->paragraph .= $line; |
||
| 96 | |||
| 97 | $cursor = new Cursor($line); |
||
| 98 | while (! $cursor->isAtEnd()) { |
||
| 99 | $result = false; |
||
| 100 | switch ($this->state) { |
||
| 101 | case self::PARAGRAPH: |
||
| 102 | // We're in a paragraph now. Link reference definitions can only appear at the beginning, so once |
||
| 103 | // we're in a paragraph, there's no going back. |
||
| 104 | return; |
||
| 105 | case self::START_DEFINITION: |
||
| 106 | $result = $this->parseStartDefinition($cursor); |
||
| 107 | break; |
||
| 108 | case self::LABEL: |
||
| 109 | $result = $this->parseLabel($cursor); |
||
| 110 | break; |
||
| 111 | case self::DESTINATION: |
||
| 112 | $result = $this->parseDestination($cursor); |
||
| 113 | break; |
||
| 114 | case self::START_TITLE: |
||
| 115 | $result = $this->parseStartTitle($cursor); |
||
| 116 | break; |
||
| 117 | case self::TITLE: |
||
| 118 | $result = $this->parseTitle($cursor); |
||
| 119 | break; |
||
| 120 | default: |
||
| 121 | // this should never happen |
||
| 122 | break; |
||
| 123 | } |
||
| 124 | |||
| 125 | if (! $result) { |
||
| 126 | $this->state = self::PARAGRAPH; |
||
| 127 | |||
| 128 | return; |
||
| 129 | } |
||
| 130 | } |
||
| 131 | } |
||
| 132 | |||
| 133 | private function parseStartDefinition(Cursor $cursor): bool |
||
| 134 | { |
||
| 135 | $cursor->advanceToNextNonSpaceOrTab(); |
||
| 136 | if ($cursor->isAtEnd() || $cursor->getCurrentCharacter() !== '[') { |
||
| 137 | return false; |
||
| 138 | } |
||
| 139 | |||
| 140 | $this->state = self::LABEL; |
||
| 141 | $this->label = ''; |
||
| 142 | |||
| 143 | $cursor->advance(); |
||
| 144 | if ($cursor->isAtEnd()) { |
||
| 145 | $this->label .= "\n"; |
||
| 146 | } |
||
| 147 | |||
| 148 | return true; |
||
| 149 | } |
||
| 150 | |||
| 151 | private function parseLabel(Cursor $cursor): bool |
||
| 198 | } |
||
| 199 | |||
| 200 | private function parseDestination(Cursor $cursor): bool |
||
| 225 | } |
||
| 226 | |||
| 227 | private function parseStartTitle(Cursor $cursor): bool |
||
| 228 | { |
||
| 229 | $cursor->advanceToNextNonSpaceOrTab(); |
||
| 230 | if ($cursor->isAtEnd()) { |
||
| 231 | $this->state = self::START_DEFINITION; |
||
| 232 | |||
| 233 | return true; |
||
| 234 | } |
||
| 235 | |||
| 236 | $this->titleDelimiter = null; |
||
| 237 | switch ($c = $cursor->getCurrentCharacter()) { |
||
| 238 | case '"': |
||
| 239 | case "'": |
||
| 240 | $this->titleDelimiter = $c; |
||
| 241 | break; |
||
| 242 | case '(': |
||
| 243 | $this->titleDelimiter = ')'; |
||
| 244 | break; |
||
| 245 | default: |
||
| 246 | // no title delimter found |
||
| 247 | break; |
||
| 248 | } |
||
| 249 | |||
| 250 | if ($this->titleDelimiter !== null) { |
||
| 251 | $this->state = self::TITLE; |
||
| 252 | $cursor->advance(); |
||
| 253 | if ($cursor->isAtEnd()) { |
||
| 254 | $this->title .= "\n"; |
||
| 255 | } |
||
| 256 | } else { |
||
| 257 | $this->finishReference(); |
||
| 258 | // There might be another reference instead, try that for the same character. |
||
| 259 | $this->state = self::START_DEFINITION; |
||
| 260 | } |
||
| 261 | |||
| 262 | return true; |
||
| 263 | } |
||
| 264 | |||
| 265 | private function parseTitle(Cursor $cursor): bool |
||
| 266 | { |
||
| 267 | \assert($this->titleDelimiter !== null); |
||
| 268 | $title = LinkParserHelper::parsePartialLinkTitle($cursor, $this->titleDelimiter); |
||
| 269 | |||
| 270 | if ($title === null) { |
||
| 271 | // Invalid title, stop |
||
| 272 | return false; |
||
| 273 | } |
||
| 274 | |||
| 275 | // Did we find the end delimiter? |
||
| 276 | $endDelimiterFound = false; |
||
| 277 | if (\substr($title, -1) === $this->titleDelimiter) { |
||
| 278 | $endDelimiterFound = true; |
||
| 279 | // Chop it off |
||
| 280 | $title = \substr($title, 0, -1); |
||
| 281 | } |
||
| 282 | |||
| 283 | $this->title .= $title; |
||
| 284 | |||
| 285 | if (! $endDelimiterFound && $cursor->isAtEnd()) { |
||
| 286 | // Title still going, continue on next line |
||
| 287 | $this->title .= "\n"; |
||
| 288 | |||
| 289 | return true; |
||
| 290 | } |
||
| 291 | |||
| 292 | // We either hit the end delimiter or some extra whitespace |
||
| 293 | $cursor->advanceToNextNonSpaceOrTab(); |
||
| 294 | if (! $cursor->isAtEnd()) { |
||
| 295 | // spec: No further non-whitespace characters may occur on the line. |
||
| 296 | return false; |
||
| 297 | } |
||
| 298 | |||
| 299 | $this->referenceValid = true; |
||
| 300 | $this->finishReference(); |
||
| 301 | $this->paragraph = ''; |
||
| 302 | |||
| 303 | // See if there's another definition |
||
| 304 | $this->state = self::START_DEFINITION; |
||
| 305 | |||
| 306 | return true; |
||
| 307 | } |
||
| 308 | |||
| 309 | private function finishReference(): void |
||
| 323 | } |
||
| 324 | } |
||
| 325 |