Complex classes like MarkdownParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use MarkdownParser, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 34 | final class MarkdownParser implements MarkdownParserInterface |
||
| 35 | { |
||
| 36 | /** |
||
| 37 | * @var EnvironmentInterface |
||
| 38 | * |
||
| 39 | * @psalm-readonly |
||
| 40 | */ |
||
| 41 | private $environment; |
||
| 42 | |||
| 43 | /** |
||
| 44 | * @var int|float |
||
| 45 | * |
||
| 46 | * @psalm-readonly |
||
| 47 | */ |
||
| 48 | private $maxNestingLevel; |
||
| 49 | |||
| 50 | /** |
||
| 51 | * @var ReferenceMap |
||
| 52 | * |
||
| 53 | * @psalm-readonly-allow-private-mutation |
||
| 54 | */ |
||
| 55 | private $referenceMap; |
||
| 56 | |||
| 57 | /** |
||
| 58 | * @var int |
||
| 59 | * |
||
| 60 | * @psalm-readonly-allow-private-mutation |
||
| 61 | */ |
||
| 62 | private $lineNumber = 0; |
||
| 63 | |||
| 64 | /** |
||
| 65 | * @var Cursor |
||
| 66 | * |
||
| 67 | * @psalm-readonly-allow-private-mutation |
||
| 68 | */ |
||
| 69 | private $cursor; |
||
| 70 | |||
| 71 | /** |
||
| 72 | * @var array<int, BlockContinueParserInterface> |
||
| 73 | * |
||
| 74 | * @psalm-readonly-allow-private-mutation |
||
| 75 | */ |
||
| 76 | private $allBlockParsers = []; |
||
| 77 | |||
| 78 | /** |
||
| 79 | * @var array<int, BlockContinueParserInterface> |
||
| 80 | * |
||
| 81 | * @psalm-readonly-allow-private-mutation |
||
| 82 | */ |
||
| 83 | private $activeBlockParsers = []; |
||
| 84 | |||
| 85 | 2511 | public function __construct(EnvironmentInterface $environment) |
|
| 90 | |||
| 91 | 2502 | private function initialize(): void |
|
| 98 | |||
| 99 | /** |
||
| 100 | * @throws \RuntimeException |
||
| 101 | */ |
||
| 102 | 2502 | public function parse(string $input): Document |
|
| 125 | |||
| 126 | /** |
||
| 127 | * Analyze a line of text and update the document appropriately. We parse markdown text by calling this on each |
||
| 128 | * line of input, then finalizing the document. |
||
| 129 | */ |
||
| 130 | 2496 | private function incorporateLine(string $line): void |
|
| 131 | { |
||
| 132 | 2496 | $this->cursor = new Cursor($line); |
|
| 133 | |||
| 134 | 2496 | $matches = 1; |
|
| 135 | 2496 | foreach ($this->getActiveBlockParsers(1) as $blockParser) { |
|
| 136 | \assert($blockParser instanceof BlockContinueParserInterface); |
||
| 137 | 1302 | $blockContinue = $blockParser->tryContinue(clone $this->cursor, $this->getActiveBlockParser()); |
|
| 138 | 1302 | if ($blockContinue === null) { |
|
| 139 | 828 | break; |
|
| 140 | } |
||
| 141 | |||
| 142 | 900 | if ($blockContinue->isFinalize()) { |
|
| 143 | 87 | $this->finalizeAndClose($blockParser, $this->lineNumber); |
|
| 144 | |||
| 145 | 87 | return; |
|
| 146 | } |
||
| 147 | |||
| 148 | 894 | if (($state = $blockContinue->getCursorState()) !== null) { |
|
| 149 | 894 | $this->cursor->restoreState($state); |
|
|
|
|||
| 150 | } |
||
| 151 | |||
| 152 | 894 | $matches++; |
|
| 153 | } |
||
| 154 | |||
| 155 | 2496 | $unmatchedBlockParsers = $this->getActiveBlockParsers($matches); |
|
| 156 | 2496 | $lastMatchedBlockParser = $this->getActiveBlockParsers()[$matches - 1]; |
|
| 157 | 2496 | $blockParser = $lastMatchedBlockParser; |
|
| 158 | 2496 | $allClosed = empty($unmatchedBlockParsers); |
|
| 159 | |||
| 160 | // Unless last matched container is a code block, try new container starts |
||
| 161 | 2496 | $tryBlockStarts = $blockParser->getBlock() instanceof Paragraph || $blockParser->isContainer(); |
|
| 162 | 2496 | while ($tryBlockStarts) { |
|
| 163 | // this is a little performance optimization |
||
| 164 | 2496 | if ($this->cursor->isBlank()) { |
|
| 165 | 630 | $this->cursor->advanceToEnd(); |
|
| 166 | 630 | break; |
|
| 167 | } |
||
| 168 | |||
| 169 | 2496 | if (! $this->cursor->isIndented() && RegexHelper::isLetter($this->cursor->getNextNonSpaceCharacter())) { |
|
| 170 | 1023 | $this->cursor->advanceToNextNonSpaceOrTab(); |
|
| 171 | 1023 | break; |
|
| 172 | } |
||
| 173 | |||
| 174 | 2112 | if ($blockParser->getBlock()->getDepth() >= $this->maxNestingLevel) { |
|
| 175 | 3 | break; |
|
| 176 | } |
||
| 177 | |||
| 178 | 2112 | $blockStart = $this->findBlockStart($blockParser); |
|
| 179 | 2112 | if ($blockStart === null) { |
|
| 180 | 1299 | $this->cursor->advanceToNextNonSpaceOrTab(); |
|
| 181 | 1299 | break; |
|
| 182 | } |
||
| 183 | |||
| 184 | 954 | if (($state = $blockStart->getCursorState()) !== null) { |
|
| 185 | 954 | $this->cursor->restoreState($state); |
|
| 186 | } |
||
| 187 | |||
| 188 | 954 | if (! $allClosed) { |
|
| 189 | 210 | $this->finalizeBlocks($unmatchedBlockParsers, $this->lineNumber - 1); |
|
| 190 | 210 | $allClosed = true; |
|
| 191 | } |
||
| 192 | |||
| 193 | 954 | if ($blockStart->isReplaceActiveBlockParser()) { |
|
| 194 | 132 | $this->prepareActiveBlockParserForReplacement(); |
|
| 195 | } |
||
| 196 | |||
| 197 | 954 | foreach ($blockStart->getBlockParsers() as $newBlockParser) { |
|
| 198 | 954 | $blockParser = $this->addChild($newBlockParser); |
|
| 199 | 954 | $tryBlockStarts = $newBlockParser->isContainer(); |
|
| 200 | } |
||
| 201 | } |
||
| 202 | |||
| 203 | // What remains ath the offset is a text line. Add the text to the appropriate block. |
||
| 204 | |||
| 205 | // First check for a lazy paragraph continuation: |
||
| 206 | 2496 | if (! $allClosed && ! $this->cursor->isBlank() && $this->getActiveBlockParser()->canHaveLazyContinuationLines()) { |
|
| 207 | 48 | $this->getActiveBlockParser()->addLine($this->cursor->getRemainder()); |
|
| 208 | } else { |
||
| 209 | // finalize any blocks not matched |
||
| 210 | 2496 | if (! $allClosed) { |
|
| 211 | 669 | $this->finalizeBlocks($unmatchedBlockParsers, $this->lineNumber); |
|
| 212 | } |
||
| 213 | |||
| 214 | 2496 | if (! $blockParser->isContainer()) { |
|
| 215 | 981 | $this->getActiveBlockParser()->addLine($this->cursor->getRemainder()); |
|
| 216 | 2181 | } elseif (! $this->cursor->isBlank()) { |
|
| 217 | 2133 | $this->addChild(new ParagraphParser()); |
|
| 218 | 2133 | $this->getActiveBlockParser()->addLine($this->cursor->getRemainder()); |
|
| 219 | } |
||
| 220 | } |
||
| 221 | 2496 | } |
|
| 222 | |||
| 223 | 2112 | private function findBlockStart(BlockContinueParserInterface $lastMatchedBlockParser): ?BlockStart |
|
| 224 | { |
||
| 225 | 2112 | $matchedBlockParser = new MarkdownParserState($this->getActiveBlockParser(), $lastMatchedBlockParser); |
|
| 226 | |||
| 227 | 2112 | foreach ($this->environment->getBlockStartParsers() as $blockStartParser) { |
|
| 228 | \assert($blockStartParser instanceof BlockStartParserInterface); |
||
| 229 | 2109 | if (($result = $blockStartParser->tryStart(clone $this->cursor, $matchedBlockParser)) !== null) { |
|
| 230 | 954 | return $result; |
|
| 231 | } |
||
| 232 | } |
||
| 233 | |||
| 234 | 1299 | return null; |
|
| 235 | } |
||
| 236 | |||
| 237 | /** |
||
| 238 | * @param array<int, BlockContinueParserInterface> $blockParsers |
||
| 239 | */ |
||
| 240 | 2496 | private function finalizeBlocks(array $blockParsers, int $endLineNumber): void |
|
| 246 | |||
| 247 | /** |
||
| 248 | * Finalize a block. Close it and do any necessary postprocessing, e.g. creating string_content from strings, |
||
| 249 | * setting the 'tight' or 'loose' status of a list, and parsing the beginnings of paragraphs for reference |
||
| 250 | * definitions. |
||
| 251 | */ |
||
| 252 | 2496 | private function finalizeAndClose(BlockContinueParserInterface $blockParser, int $endLineNumber): void |
|
| 265 | |||
| 266 | /** |
||
| 267 | * Walk through a block & children recursively, parsing string content into inline content where appropriate. |
||
| 268 | */ |
||
| 269 | 2496 | private function processInlines(): void |
|
| 270 | { |
||
| 271 | 2496 | $p = new InlineParserEngine($this->environment, $this->referenceMap); |
|
| 272 | |||
| 273 | 2496 | foreach ($this->allBlockParsers as $blockParser) { |
|
| 274 | \assert($blockParser instanceof BlockContinueParserInterface); |
||
| 275 | 2496 | $blockParser->parseInlines($p); |
|
| 276 | } |
||
| 277 | 2496 | } |
|
| 278 | |||
| 279 | /** |
||
| 280 | * Add block of type tag as a child of the tip. If the tip can't accept children, close and finalize it and try |
||
| 281 | * its parent, and so on til we find a block that can accept children. |
||
| 282 | */ |
||
| 283 | 2496 | private function addChild(BlockContinueParserInterface $blockParser): BlockContinueParserInterface |
|
| 296 | |||
| 297 | 2502 | private function activateBlockParser(BlockContinueParserInterface $blockParser): void |
|
| 302 | |||
| 303 | 2496 | private function deactivateBlockParser(): BlockContinueParserInterface |
|
| 312 | |||
| 313 | 132 | private function prepareActiveBlockParserForReplacement(): void |
|
| 325 | |||
| 326 | /** |
||
| 327 | * @param ReferenceInterface[] $references |
||
| 328 | */ |
||
| 329 | 2133 | private function updateReferenceMap(iterable $references): void |
|
| 337 | |||
| 338 | /** |
||
| 339 | * @return array<int, BlockContinueParserInterface> |
||
| 340 | */ |
||
| 341 | 2496 | private function getActiveBlockParsers(?int $offset = 0): array |
|
| 349 | |||
| 350 | 2496 | public function getActiveBlockParser(): BlockContinueParserInterface |
|
| 359 | } |
||
| 360 |
PHP Analyzer performs a side-effects analysis of your code. A side-effect is basically anything that might be visible after the scope of the method is left.
Let’s take a look at an example:
If we look at the
getEmail()method, we can see that it has no side-effect. Whether you call this method or not, no future calls to other methods are affected by this. As such code as the following is useless:On the hand, if we look at the
setEmail(), this method _has_ side-effects. In the following case, we could not remove the method call: