Complex classes like ReferenceParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use ReferenceParser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
22 | final class ReferenceParser |
||
23 | { |
||
24 | // Looking for the start of a definition, i.e. `[` |
||
25 | private const START_DEFINITION = 0; |
||
26 | // Looking for and parsing the label, i.e. `[foo]` within `[foo]` |
||
27 | private const LABEL = 1; |
||
28 | // Parsing the destination, i.e. `/url` in `[foo]: /url` |
||
29 | private const DESTINATION = 2; |
||
30 | // Looking for the start of a title, i.e. the first `"` in `[foo]: /url "title"` |
||
31 | private const START_TITLE = 3; |
||
32 | // Parsing the content of the title, i.e. `title` in `[foo]: /url "title"` |
||
33 | private const TITLE = 4; |
||
34 | // End state, no matter what kind of lines we add, they won't be references |
||
35 | private const PARAGRAPH = 5; |
||
36 | |||
37 | /** |
||
38 | * @var string |
||
39 | * |
||
40 | * @psalm-readonly-allow-private-mutation |
||
41 | */ |
||
42 | private $paragraph = ''; |
||
43 | |||
44 | /** |
||
45 | * @var array<int, ReferenceInterface> |
||
46 | * |
||
47 | * @psalm-readonly-allow-private-mutation |
||
48 | */ |
||
49 | private $references = []; |
||
50 | |||
51 | /** |
||
52 | * @var int |
||
53 | * |
||
54 | * @psalm-readonly-allow-private-mutation |
||
55 | */ |
||
56 | private $state = self::START_DEFINITION; |
||
57 | |||
58 | /** |
||
59 | * @var string|null |
||
60 | * |
||
61 | * @psalm-readonly-allow-private-mutation |
||
62 | */ |
||
63 | private $label; |
||
64 | |||
65 | /** |
||
66 | * @var string|null |
||
67 | * |
||
68 | * @psalm-readonly-allow-private-mutation |
||
69 | */ |
||
70 | private $destination; |
||
71 | |||
72 | /** |
||
73 | * @var string string |
||
74 | * |
||
75 | * @psalm-readonly-allow-private-mutation |
||
76 | */ |
||
77 | private $title = ''; |
||
78 | |||
79 | /** |
||
80 | * @var string|null |
||
81 | * |
||
82 | * @psalm-readonly-allow-private-mutation |
||
83 | */ |
||
84 | private $titleDelimiter; |
||
85 | |||
86 | /** |
||
87 | * @var bool |
||
88 | * |
||
89 | * @psalm-readonly-allow-private-mutation |
||
90 | */ |
||
91 | private $referenceValid = false; |
||
92 | |||
93 | 2133 | public function getParagraphContent(): string |
|
97 | |||
98 | /** |
||
99 | * @return ReferenceInterface[] |
||
100 | */ |
||
101 | 2133 | public function getReferences(): iterable |
|
107 | |||
108 | 2031 | public function hasReferences(): bool |
|
112 | |||
113 | 2133 | public function parse(string $line): void |
|
156 | |||
157 | 2133 | private function parseStartDefinition(Cursor $cursor): bool |
|
174 | |||
175 | 435 | private function parseLabel(Cursor $cursor): bool |
|
176 | { |
||
177 | 435 | $cursor->advanceToNextNonSpaceOrTab(); |
|
178 | |||
179 | 435 | $partialLabel = LinkParserHelper::parsePartialLinkLabel($cursor); |
|
180 | 435 | if ($partialLabel === null) { |
|
181 | return false; |
||
182 | } |
||
183 | |||
184 | \assert($this->label !== null); |
||
185 | 435 | $this->label .= $partialLabel; |
|
186 | |||
187 | 435 | if ($cursor->isAtEnd()) { |
|
188 | // label might continue on next line |
||
189 | 9 | $this->label .= "\n"; |
|
190 | |||
191 | 9 | return true; |
|
192 | } |
||
193 | |||
194 | 432 | if ($cursor->getCharacter() !== ']') { |
|
195 | 45 | return false; |
|
196 | } |
||
197 | |||
198 | 411 | $cursor->advance(); |
|
199 | |||
200 | // end of label |
||
201 | 411 | if ($cursor->getCharacter() !== ':') { |
|
202 | 318 | return false; |
|
203 | } |
||
204 | |||
205 | 255 | $cursor->advance(); |
|
206 | |||
207 | // spec: A link label can have at most 999 characters inside the square brackets |
||
208 | 255 | if (\mb_strlen($this->label, 'utf-8') > 999) { |
|
209 | return false; |
||
210 | } |
||
211 | |||
212 | // spec: A link label must contain at least one non-whitespace character |
||
213 | 255 | if (\trim($this->label) === '') { |
|
214 | 6 | return false; |
|
215 | } |
||
216 | |||
217 | 249 | $cursor->advanceToNextNonSpaceOrTab(); |
|
218 | |||
219 | 249 | $this->state = self::DESTINATION; |
|
220 | |||
221 | 249 | return true; |
|
222 | } |
||
223 | |||
224 | 246 | private function parseDestination(Cursor $cursor): bool |
|
250 | |||
251 | 135 | private function parseStartTitle(Cursor $cursor): bool |
|
288 | |||
289 | 114 | private function parseTitle(Cursor $cursor): bool |
|
290 | { |
||
291 | \assert($this->titleDelimiter !== null); |
||
292 | 114 | $title = LinkParserHelper::parsePartialLinkTitle($cursor, $this->titleDelimiter); |
|
293 | |||
294 | 114 | if ($title === null) { |
|
295 | // Invalid title, stop |
||
296 | return false; |
||
297 | } |
||
298 | |||
299 | // Did we find the end delimiter? |
||
300 | 114 | $endDelimiterFound = false; |
|
301 | 114 | if (\substr($title, -1) === $this->titleDelimiter) { |
|
302 | 111 | $endDelimiterFound = true; |
|
303 | // Chop it off |
||
304 | 111 | $title = \substr($title, 0, -1); |
|
305 | } |
||
306 | |||
307 | 114 | $this->title .= $title; |
|
308 | |||
309 | 114 | if (! $endDelimiterFound && $cursor->isAtEnd()) { |
|
310 | // Title still going, continue on next line |
||
311 | 6 | $this->title .= "\n"; |
|
312 | |||
313 | 6 | return true; |
|
314 | } |
||
315 | |||
316 | // We either hit the end delimiter or some extra whitespace |
||
317 | 111 | $cursor->advanceToNextNonSpaceOrTab(); |
|
318 | 111 | if (! $cursor->isAtEnd()) { |
|
319 | // spec: No further non-whitespace characters may occur on the line. |
||
320 | 6 | return false; |
|
321 | } |
||
322 | |||
323 | 105 | $this->referenceValid = true; |
|
324 | 105 | $this->finishReference(); |
|
325 | 105 | $this->paragraph = ''; |
|
326 | |||
327 | // See if there's another definition |
||
328 | 105 | $this->state = self::START_DEFINITION; |
|
329 | |||
330 | 105 | return true; |
|
331 | } |
||
332 | |||
333 | 2133 | private function finishReference(): void |
|
348 | } |
||
349 |