Total Complexity | 48 |
Total Lines | 311 |
Duplicated Lines | 0 % |
Coverage | 98.47% |
Changes | 3 | ||
Bugs | 0 | Features | 0 |
Complex classes like MarkdownParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use MarkdownParser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
38 | final class MarkdownParser implements MarkdownParserInterface |
||
39 | { |
||
40 | /** |
||
41 | * @var EnvironmentInterface |
||
42 | * |
||
43 | * @psalm-readonly |
||
44 | */ |
||
45 | private $environment; |
||
46 | |||
47 | /** |
||
48 | * @var int |
||
49 | * |
||
50 | * @psalm-readonly-allow-private-mutation |
||
51 | */ |
||
52 | private $maxNestingLevel; |
||
53 | |||
54 | /** |
||
55 | * @var ReferenceMap |
||
56 | * |
||
57 | * @psalm-readonly-allow-private-mutation |
||
58 | */ |
||
59 | private $referenceMap; |
||
60 | |||
61 | /** |
||
62 | * @var int |
||
63 | * |
||
64 | * @psalm-readonly-allow-private-mutation |
||
65 | */ |
||
66 | private $lineNumber = 0; |
||
67 | |||
68 | /** |
||
69 | * @var Cursor |
||
70 | * |
||
71 | * @psalm-readonly-allow-private-mutation |
||
72 | */ |
||
73 | private $cursor; |
||
74 | |||
75 | /** |
||
76 | * @var array<int, BlockContinueParserInterface> |
||
77 | * |
||
78 | * @psalm-readonly-allow-private-mutation |
||
79 | */ |
||
80 | private $activeBlockParsers = []; |
||
81 | |||
82 | /** |
||
83 | * @var array<int, BlockContinueParserInterface> |
||
84 | * |
||
85 | * @psalm-readonly-allow-private-mutation |
||
86 | */ |
||
87 | private $closedBlockParsers = []; |
||
88 | |||
89 | 3021 | public function __construct(EnvironmentInterface $environment) |
|
90 | { |
||
91 | 3021 | $this->environment = $environment; |
|
92 | 3021 | } |
|
93 | |||
94 | 3006 | private function initialize(): void |
|
95 | { |
||
96 | 3006 | $this->referenceMap = new ReferenceMap(); |
|
97 | 3006 | $this->lineNumber = 0; |
|
98 | 3006 | $this->activeBlockParsers = []; |
|
99 | 3006 | $this->closedBlockParsers = []; |
|
100 | |||
101 | 3006 | $this->maxNestingLevel = $this->environment->getConfiguration()->get('max_nesting_level'); |
|
102 | 2994 | } |
|
103 | |||
104 | /** |
||
105 | * @throws \RuntimeException |
||
106 | */ |
||
107 | 3006 | public function parse(string $input): Document |
|
108 | { |
||
109 | 3006 | $this->initialize(); |
|
110 | |||
111 | 2994 | $documentParser = new DocumentBlockParser($this->referenceMap); |
|
112 | 2994 | $this->activateBlockParser($documentParser); |
|
113 | |||
114 | 2994 | $preParsedEvent = new DocumentPreParsedEvent($documentParser->getBlock(), new MarkdownInput($input)); |
|
115 | 2985 | $this->environment->dispatch($preParsedEvent); |
|
116 | 2982 | $markdownInput = $preParsedEvent->getMarkdown(); |
|
117 | |||
118 | 2982 | foreach ($markdownInput->getLines() as $lineNumber => $line) { |
|
119 | 2982 | $this->lineNumber = $lineNumber; |
|
120 | 2982 | $this->incorporateLine($line); |
|
121 | } |
||
122 | |||
123 | // finalizeAndProcess |
||
124 | 2982 | $this->closeBlockParsers(\count($this->activeBlockParsers), $this->lineNumber); |
|
125 | 2982 | $this->processInlines(); |
|
126 | |||
127 | 2982 | $this->environment->dispatch(new DocumentParsedEvent($documentParser->getBlock())); |
|
128 | |||
129 | 2982 | return $documentParser->getBlock(); |
|
130 | } |
||
131 | |||
132 | /** |
||
133 | * Analyze a line of text and update the document appropriately. We parse markdown text by calling this on each |
||
134 | * line of input, then finalizing the document. |
||
135 | */ |
||
136 | 2982 | private function incorporateLine(string $line): void |
|
137 | { |
||
138 | 2982 | $this->cursor = new Cursor($line); |
|
139 | |||
140 | 2982 | $matches = 1; |
|
141 | 2982 | for ($i = 1; $i < \count($this->activeBlockParsers); $i++) { |
|
1 ignored issue
–
show
|
|||
142 | 1479 | $blockParser = $this->activeBlockParsers[$i]; |
|
143 | 1479 | $blockContinue = $blockParser->tryContinue(clone $this->cursor, $this->getActiveBlockParser()); |
|
144 | 1479 | if ($blockContinue === null) { |
|
145 | 984 | break; |
|
146 | } |
||
147 | |||
148 | 999 | if ($blockContinue->isFinalize()) { |
|
149 | 90 | $this->closeBlockParsers(\count($this->activeBlockParsers) - $i, $this->lineNumber); |
|
150 | |||
151 | 90 | return; |
|
152 | } |
||
153 | |||
154 | 993 | if (($state = $blockContinue->getCursorState()) !== null) { |
|
155 | 993 | $this->cursor->restoreState($state); |
|
156 | } |
||
157 | |||
158 | 993 | $matches++; |
|
159 | } |
||
160 | |||
161 | 2982 | $unmatchedBlocks = \count($this->activeBlockParsers) - $matches; |
|
162 | 2982 | $blockParser = $this->activeBlockParsers[$matches - 1]; |
|
163 | 2982 | $startedNewBlock = false; |
|
164 | |||
165 | // Unless last matched container is a code block, try new container starts |
||
166 | 2982 | $tryBlockStarts = $blockParser->getBlock() instanceof Paragraph || $blockParser->isContainer(); |
|
167 | 2982 | while ($tryBlockStarts) { |
|
168 | // this is a little performance optimization |
||
169 | 2982 | if ($this->cursor->isBlank()) { |
|
170 | 777 | $this->cursor->advanceToEnd(); |
|
171 | 777 | break; |
|
172 | } |
||
173 | |||
174 | 2982 | if (! $this->cursor->isIndented() && RegexHelper::isLetter($this->cursor->getNextNonSpaceCharacter())) { |
|
175 | 1434 | $this->cursor->advanceToNextNonSpaceOrTab(); |
|
176 | 1434 | break; |
|
177 | } |
||
178 | |||
179 | 2340 | if ($blockParser->getBlock()->getDepth() >= $this->maxNestingLevel) { |
|
180 | 3 | break; |
|
181 | } |
||
182 | |||
183 | 2340 | $blockStart = $this->findBlockStart($blockParser); |
|
184 | 2340 | if ($blockStart === null) { |
|
185 | 1395 | $this->cursor->advanceToNextNonSpaceOrTab(); |
|
186 | 1395 | break; |
|
187 | } |
||
188 | |||
189 | 1116 | if (($state = $blockStart->getCursorState()) !== null) { |
|
190 | 1116 | $this->cursor->restoreState($state); |
|
191 | } |
||
192 | |||
193 | 1116 | $startedNewBlock = true; |
|
194 | |||
195 | // We're starting a new block. If we have any previous blocks that need to be closed, we need to do it now. |
||
196 | 1116 | if ($unmatchedBlocks > 0) { |
|
197 | 270 | $this->closeBlockParsers($unmatchedBlocks, $this->lineNumber - 1); |
|
198 | 270 | $unmatchedBlocks = 0; |
|
199 | } |
||
200 | |||
201 | 1116 | if ($blockStart->isReplaceActiveBlockParser()) { |
|
202 | 180 | $this->prepareActiveBlockParserForReplacement(); |
|
203 | } |
||
204 | |||
205 | 1116 | foreach ($blockStart->getBlockParsers() as $newBlockParser) { |
|
206 | 1116 | $blockParser = $this->addChild($newBlockParser); |
|
207 | 1116 | $tryBlockStarts = $newBlockParser->isContainer(); |
|
208 | } |
||
209 | } |
||
210 | |||
211 | // What remains ath the offset is a text line. Add the text to the appropriate block. |
||
212 | |||
213 | // First check for a lazy paragraph continuation: |
||
214 | 2982 | if (! $startedNewBlock && ! $this->cursor->isBlank() && $this->getActiveBlockParser()->canHaveLazyContinuationLines()) { |
|
215 | 480 | $this->getActiveBlockParser()->addLine($this->cursor->getRemainder()); |
|
216 | } else { |
||
217 | // finalize any blocks not matched |
||
218 | 2982 | if ($unmatchedBlocks > 0) { |
|
219 | 816 | $this->closeBlockParsers($unmatchedBlocks, $this->lineNumber); |
|
220 | } |
||
221 | |||
222 | 2982 | if (! $blockParser->isContainer()) { |
|
223 | 786 | $this->getActiveBlockParser()->addLine($this->cursor->getRemainder()); |
|
224 | 2655 | } elseif (! $this->cursor->isBlank()) { |
|
225 | 2604 | $this->addChild(new ParagraphParser()); |
|
226 | 2604 | $this->getActiveBlockParser()->addLine($this->cursor->getRemainder()); |
|
227 | } |
||
228 | } |
||
229 | 2982 | } |
|
230 | |||
231 | 2340 | private function findBlockStart(BlockContinueParserInterface $lastMatchedBlockParser): ?BlockStart |
|
232 | { |
||
233 | 2340 | $matchedBlockParser = new MarkdownParserState($this->getActiveBlockParser(), $lastMatchedBlockParser); |
|
234 | |||
235 | 2340 | foreach ($this->environment->getBlockStartParsers() as $blockStartParser) { |
|
236 | \assert($blockStartParser instanceof BlockStartParserInterface); |
||
237 | 2337 | if (($result = $blockStartParser->tryStart(clone $this->cursor, $matchedBlockParser)) !== null) { |
|
238 | 1116 | return $result; |
|
239 | } |
||
240 | } |
||
241 | |||
242 | 1395 | return null; |
|
243 | } |
||
244 | |||
245 | 2982 | private function closeBlockParsers(int $count, int $endLineNumber): void |
|
246 | { |
||
247 | 2982 | for ($i = 0; $i < $count; $i++) { |
|
248 | 2982 | $blockParser = $this->deactivateBlockParser(); |
|
249 | 2982 | $this->finalize($blockParser, $endLineNumber); |
|
250 | // Remember for inline parsing |
||
251 | 2982 | $this->closedBlockParsers[] = $blockParser; |
|
252 | } |
||
253 | 2982 | } |
|
254 | |||
255 | /** |
||
256 | * Finalize a block. Close it and do any necessary postprocessing, e.g. creating string_content from strings, |
||
257 | * setting the 'tight' or 'loose' status of a list, and parsing the beginnings of paragraphs for reference |
||
258 | * definitions. |
||
259 | */ |
||
260 | 2982 | private function finalize(BlockContinueParserInterface $blockParser, int $endLineNumber): void |
|
261 | { |
||
262 | 2982 | if ($blockParser instanceof ParagraphParser) { |
|
263 | 2496 | $this->updateReferenceMap($blockParser->getReferences()); |
|
264 | } |
||
265 | |||
266 | 2982 | $blockParser->getBlock()->setEndLine($endLineNumber); |
|
267 | 2982 | $blockParser->closeBlock(); |
|
268 | 2982 | } |
|
269 | |||
270 | /** |
||
271 | * Walk through a block & children recursively, parsing string content into inline content where appropriate. |
||
272 | */ |
||
273 | 2982 | private function processInlines(): void |
|
274 | { |
||
275 | 2982 | $p = new InlineParserEngine($this->environment, $this->referenceMap); |
|
276 | |||
277 | 2982 | foreach ($this->closedBlockParsers as $blockParser) { |
|
278 | 2982 | if ($blockParser instanceof BlockContinueParserWithInlinesInterface) { |
|
279 | 2694 | $blockParser->parseInlines($p); |
|
280 | } |
||
281 | } |
||
282 | 2982 | } |
|
283 | |||
284 | /** |
||
285 | * Add block of type tag as a child of the tip. If the tip can't accept children, close and finalize it and try |
||
286 | * its parent, and so on til we find a block that can accept children. |
||
287 | */ |
||
288 | 2982 | private function addChild(BlockContinueParserInterface $blockParser): BlockContinueParserInterface |
|
289 | { |
||
290 | 2982 | $blockParser->getBlock()->setStartLine($this->lineNumber); |
|
291 | |||
292 | 2982 | while (! $this->getActiveBlockParser()->canContain($blockParser->getBlock())) { |
|
293 | 171 | $this->closeBlockParsers(1, $this->lineNumber - 1); |
|
294 | } |
||
295 | |||
296 | 2982 | $this->getActiveBlockParser()->getBlock()->appendChild($blockParser->getBlock()); |
|
297 | 2982 | $this->activateBlockParser($blockParser); |
|
298 | |||
299 | 2982 | return $blockParser; |
|
300 | } |
||
301 | |||
302 | 2994 | private function activateBlockParser(BlockContinueParserInterface $blockParser): void |
|
303 | { |
||
304 | 2994 | $this->activeBlockParsers[] = $blockParser; |
|
305 | 2994 | } |
|
306 | |||
307 | 2982 | private function deactivateBlockParser(): BlockContinueParserInterface |
|
308 | { |
||
309 | 2982 | $popped = \array_pop($this->activeBlockParsers); |
|
310 | 2982 | if ($popped === null) { |
|
311 | throw new \RuntimeException('The last block parser should not be deactivated'); |
||
312 | } |
||
313 | |||
314 | 2982 | return $popped; |
|
315 | } |
||
316 | |||
317 | 180 | private function prepareActiveBlockParserForReplacement(): void |
|
318 | { |
||
319 | // Note that we don't want to parse inlines or finalize this block, as it's getting replaced. |
||
320 | 180 | $old = $this->deactivateBlockParser(); |
|
321 | |||
322 | 180 | if ($old instanceof ParagraphParser) { |
|
323 | 180 | $this->updateReferenceMap($old->getReferences()); |
|
324 | } |
||
325 | |||
326 | 180 | $old->getBlock()->detach(); |
|
327 | 180 | } |
|
328 | |||
329 | /** |
||
330 | * @param ReferenceInterface[] $references |
||
331 | */ |
||
332 | 2604 | private function updateReferenceMap(iterable $references): void |
|
337 | } |
||
338 | } |
||
339 | 2604 | } |
|
340 | |||
341 | 2982 | public function getActiveBlockParser(): BlockContinueParserInterface |
|
349 | } |
||
350 | } |
||
351 |
If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: