Total Complexity | 151 |
Total Lines | 925 |
Duplicated Lines | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
Complex classes like Parsedown often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parsedown, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
19 | class Parsedown |
||
20 | { |
||
21 | # |
||
22 | # Multiton (http://en.wikipedia.org/wiki/Multiton_pattern) |
||
23 | # |
||
24 | |||
25 | /** |
||
26 | * @param string $name |
||
27 | * @return mixed|Parsedown |
||
28 | */ |
||
29 | public static function instance($name = 'default') |
||
40 | } |
||
41 | |||
42 | private static $instances = []; |
||
43 | |||
44 | # |
||
45 | # Setters |
||
46 | # |
||
47 | |||
48 | private $break_marker = " \n"; |
||
49 | |||
50 | /** |
||
51 | * @param $breaks_enabled |
||
52 | * @return $this |
||
53 | */ |
||
54 | public function set_breaks_enabled($breaks_enabled) |
||
59 | } |
||
60 | |||
61 | # |
||
62 | # Fields |
||
63 | # |
||
64 | |||
65 | private $reference_map = []; |
||
66 | private $escape_sequence_map = []; |
||
67 | |||
68 | # |
||
69 | # Public Methods |
||
70 | # |
||
71 | |||
72 | /** |
||
73 | * @param $text |
||
74 | * @return mixed|string |
||
75 | */ |
||
76 | public function parse($text) |
||
120 | } |
||
121 | |||
122 | # |
||
123 | # Private Methods |
||
124 | # |
||
125 | |||
126 | /** |
||
127 | * @param array $lines |
||
128 | * @param string $context |
||
129 | * @return string |
||
130 | */ |
||
131 | private function parse_block_elements(array $lines, $context = '') |
||
132 | { |
||
133 | $elements = []; |
||
134 | |||
135 | $element = [ |
||
136 | 'type' => '', |
||
137 | ]; |
||
138 | |||
139 | foreach ($lines as $line) { |
||
140 | # fenced elements |
||
141 | |||
142 | switch ($element['type']) { |
||
143 | case 'fenced block': |
||
144 | |||
145 | if (!isset($element['closed'])) { |
||
146 | if (preg_match('/^[ ]*' . $element['fence'][0] . '{3,}[ ]*$/', $line)) { |
||
147 | $element['closed'] = true; |
||
148 | } else { |
||
149 | '' !== $element['text'] and $element['text'] .= "\n"; |
||
150 | |||
151 | $element['text'] .= $line; |
||
152 | } |
||
153 | |||
154 | continue 2; |
||
155 | } |
||
156 | |||
157 | break; |
||
158 | case 'block-level markup': |
||
159 | |||
160 | if (!isset($element['closed'])) { |
||
161 | if (false !== mb_strpos($line, $element['start'])) { # opening tag |
||
162 | $element['depth']++; |
||
163 | } |
||
164 | |||
165 | if (false !== mb_strpos($line, $element['end'])) { # closing tag |
||
166 | $element['depth'] > 0 ? $element['depth']-- : $element['closed'] = true; |
||
167 | } |
||
168 | |||
169 | $element['text'] .= "\n" . $line; |
||
170 | |||
171 | continue 2; |
||
172 | } |
||
173 | |||
174 | break; |
||
175 | } |
||
176 | |||
177 | # * |
||
178 | |||
179 | $deindented_line = ltrim($line); |
||
180 | |||
181 | if ('' === $deindented_line) { |
||
182 | $element['interrupted'] = true; |
||
183 | |||
184 | continue; |
||
185 | } |
||
186 | |||
187 | # composite elements |
||
188 | |||
189 | switch ($element['type']) { |
||
190 | case 'blockquote': |
||
191 | |||
192 | if (!isset($element['interrupted'])) { |
||
193 | $line = preg_replace('/^[ ]*>[ ]?/', '', $line); |
||
194 | |||
195 | $element['lines'] [] = $line; |
||
196 | |||
197 | continue 2; |
||
198 | } |
||
199 | |||
200 | break; |
||
201 | case 'li': |
||
202 | |||
203 | if (preg_match('/^([ ]{0,3})(\d+[.]|[*+-])[ ](.*)/', $line, $matches)) { |
||
204 | if ($element['indentation'] !== $matches[1]) { |
||
205 | $element['lines'] [] = $line; |
||
206 | } else { |
||
207 | unset($element['last']); |
||
208 | |||
209 | $elements [] = $element; |
||
210 | |||
211 | $element = [ |
||
212 | 'type' => 'li', |
||
213 | 'indentation' => $matches[1], |
||
214 | 'last' => true, |
||
215 | 'lines' => [ |
||
216 | preg_replace('/^[ ]{0,4}/', '', $matches[3]), |
||
217 | ], |
||
218 | ]; |
||
219 | } |
||
220 | |||
221 | continue 2; |
||
222 | } |
||
223 | |||
224 | if (isset($element['interrupted'])) { |
||
225 | if (' ' === $line[0]) { |
||
226 | $element['lines'] [] = ''; |
||
227 | |||
228 | $line = preg_replace('/^[ ]{0,4}/', '', $line); |
||
229 | |||
230 | $element['lines'] [] = $line; |
||
231 | |||
232 | unset($element['interrupted']); |
||
233 | |||
234 | continue 2; |
||
235 | } |
||
236 | } else { |
||
237 | $line = preg_replace('/^[ ]{0,4}/', '', $line); |
||
238 | |||
239 | $element['lines'] [] = $line; |
||
240 | |||
241 | continue 2; |
||
242 | } |
||
243 | |||
244 | break; |
||
245 | } |
||
246 | |||
247 | # indentation sensitive types |
||
248 | |||
249 | switch ($line[0]) { |
||
250 | case ' ': |
||
251 | |||
252 | # code block |
||
253 | |||
254 | if (isset($line[3]) and ' ' === $line[3] and ' ' === $line[2] and ' ' === $line[1]) { |
||
255 | $code_line = mb_substr($line, 4); |
||
256 | |||
257 | if ('code block' === $element['type']) { |
||
258 | if (isset($element['interrupted'])) { |
||
259 | $element['text'] .= "\n"; |
||
260 | |||
261 | unset($element['interrupted']); |
||
262 | } |
||
263 | |||
264 | $element['text'] .= "\n" . $code_line; |
||
265 | } else { |
||
266 | $elements [] = $element; |
||
267 | |||
268 | $element = [ |
||
269 | 'type' => 'code block', |
||
270 | 'text' => $code_line, |
||
271 | ]; |
||
272 | } |
||
273 | |||
274 | continue 2; |
||
275 | } |
||
276 | |||
277 | break; |
||
278 | case '#': |
||
279 | |||
280 | # atx heading (#) |
||
281 | |||
282 | if (isset($line[1])) { |
||
283 | $elements [] = $element; |
||
284 | |||
285 | $level = 1; |
||
286 | |||
287 | while (isset($line[$level]) and '#' === $line[$level]) { |
||
288 | ++$level; |
||
289 | } |
||
290 | |||
291 | $element = [ |
||
292 | 'type' => 'heading', |
||
293 | 'text' => trim($line, '# '), |
||
294 | 'level' => $level, |
||
295 | ]; |
||
296 | |||
297 | continue 2; |
||
298 | } |
||
299 | |||
300 | break; |
||
301 | case '-': |
||
302 | case '=': |
||
303 | |||
304 | # setext heading |
||
305 | |||
306 | if ('paragraph' === $element['type'] and false === isset($element['interrupted'])) { |
||
307 | $chopped_line = rtrim($line); |
||
308 | |||
309 | $i = 1; |
||
310 | |||
311 | while (isset($chopped_line[$i])) { |
||
312 | if ($chopped_line[$i] !== $line[0]) { |
||
313 | break 2; |
||
314 | } |
||
315 | |||
316 | ++$i; |
||
317 | } |
||
318 | |||
319 | $element['type'] = 'heading'; |
||
320 | $element['level'] = '-' === $line[0] ? 2 : 1; |
||
321 | |||
322 | continue 2; |
||
323 | } |
||
324 | |||
325 | break; |
||
326 | } |
||
327 | |||
328 | # indentation insensitive types |
||
329 | |||
330 | switch ($deindented_line[0]) { |
||
331 | case '<': |
||
332 | |||
333 | $position = mb_strpos($deindented_line, '>'); |
||
334 | |||
335 | if ($position > 1) { # tag |
||
336 | $name = mb_substr($deindented_line, 1, $position - 1); |
||
337 | $name = rtrim($name); |
||
338 | |||
339 | if ('/' === mb_substr($name, -1)) { |
||
340 | $self_closing = true; |
||
341 | |||
342 | $name = mb_substr($name, 0, -1); |
||
343 | } |
||
344 | |||
345 | $position = mb_strpos($name, ' '); |
||
346 | |||
347 | if ($position) { |
||
348 | $name = mb_substr($name, 0, $position); |
||
349 | } |
||
350 | |||
351 | if (!ctype_alpha($name)) { |
||
352 | break; |
||
353 | } |
||
354 | |||
355 | if (in_array($name, $this->inline_tags, true)) { |
||
356 | break; |
||
357 | } |
||
358 | |||
359 | $elements [] = $element; |
||
360 | |||
361 | if (isset($self_closing)) { |
||
362 | $element = [ |
||
363 | 'type' => 'self-closing tag', |
||
364 | 'text' => $deindented_line, |
||
365 | ]; |
||
366 | |||
367 | unset($self_closing); |
||
368 | |||
369 | continue 2; |
||
370 | } |
||
371 | |||
372 | $element = [ |
||
373 | 'type' => 'block-level markup', |
||
374 | 'text' => $deindented_line, |
||
375 | 'start' => '<' . $name . '>', |
||
376 | 'end' => '</' . $name . '>', |
||
377 | 'depth' => 0, |
||
378 | ]; |
||
379 | |||
380 | if (mb_strpos($deindented_line, $element['end'])) { |
||
381 | $element['closed'] = true; |
||
382 | } |
||
383 | |||
384 | continue 2; |
||
385 | } |
||
386 | |||
387 | break; |
||
388 | case '>': |
||
389 | |||
390 | # quote |
||
391 | |||
392 | if (preg_match('/^>[ ]?(.*)/', $deindented_line, $matches)) { |
||
393 | $elements [] = $element; |
||
394 | |||
395 | $element = [ |
||
396 | 'type' => 'blockquote', |
||
397 | 'lines' => [ |
||
398 | $matches[1], |
||
399 | ], |
||
400 | ]; |
||
401 | |||
402 | continue 2; |
||
403 | } |
||
404 | |||
405 | break; |
||
406 | case '[': |
||
407 | |||
408 | # reference |
||
409 | |||
410 | if (preg_match('/^\[(.+?)\]:[ ]*(.+?)(?:[ ]+[\'"](.+?)[\'"])?[ ]*$/', $deindented_line, $matches)) { |
||
411 | $label = mb_strtolower($matches[1]); |
||
412 | |||
413 | $this->reference_map[$label] = [ |
||
414 | '»' => trim($matches[2], '<>'), |
||
415 | ]; |
||
416 | |||
417 | if (isset($matches[3])) { |
||
418 | $this->reference_map[$label]['#'] = $matches[3]; |
||
419 | } |
||
420 | |||
421 | continue 2; |
||
422 | } |
||
423 | |||
424 | break; |
||
425 | case '`': |
||
426 | case '~': |
||
427 | |||
428 | # fenced code block |
||
429 | |||
430 | if (preg_match('/^([`]{3,}|[~]{3,})[ ]*(\S+)?[ ]*$/', $deindented_line, $matches)) { |
||
431 | $elements [] = $element; |
||
432 | |||
433 | $element = [ |
||
434 | 'type' => 'fenced block', |
||
435 | 'text' => '', |
||
436 | 'fence' => $matches[1], |
||
437 | ]; |
||
438 | |||
439 | isset($matches[2]) and $element['language'] = $matches[2]; |
||
440 | |||
441 | continue 2; |
||
442 | } |
||
443 | |||
444 | break; |
||
445 | case '*': |
||
446 | case '+': |
||
447 | case '-': |
||
448 | case '_': |
||
449 | |||
450 | # hr |
||
451 | |||
452 | if (preg_match('/^([-*_])([ ]{0,2}\1){2,}[ ]*$/', $deindented_line)) { |
||
453 | $elements [] = $element; |
||
454 | |||
455 | $element = [ |
||
456 | 'type' => 'hr', |
||
457 | ]; |
||
458 | |||
459 | continue 2; |
||
460 | } |
||
461 | |||
462 | # li |
||
463 | |||
464 | if (preg_match('/^([ ]*)[*+-][ ](.*)/', $line, $matches)) { |
||
465 | $elements [] = $element; |
||
466 | |||
467 | $element = [ |
||
468 | 'type' => 'li', |
||
469 | 'ordered' => false, |
||
470 | 'indentation' => $matches[1], |
||
471 | 'last' => true, |
||
472 | 'lines' => [ |
||
473 | preg_replace('/^[ ]{0,4}/', '', $matches[2]), |
||
474 | ], |
||
475 | ]; |
||
476 | |||
477 | continue 2; |
||
478 | } |
||
479 | } |
||
480 | |||
481 | # li |
||
482 | |||
483 | if ($deindented_line[0] <= '9' and $deindented_line[0] >= '0' and preg_match('/^([ ]*)\d+[.][ ](.*)/', $line, $matches)) { |
||
484 | $elements [] = $element; |
||
485 | |||
486 | $element = [ |
||
487 | 'type' => 'li', |
||
488 | 'ordered' => true, |
||
489 | 'indentation' => $matches[1], |
||
490 | 'last' => true, |
||
491 | 'lines' => [ |
||
492 | preg_replace('/^[ ]{0,4}/', '', $matches[2]), |
||
493 | ], |
||
494 | ]; |
||
495 | |||
496 | continue; |
||
497 | } |
||
498 | |||
499 | # paragraph |
||
500 | |||
501 | if ('paragraph' === $element['type']) { |
||
502 | if (isset($element['interrupted'])) { |
||
503 | $elements [] = $element; |
||
504 | |||
505 | $element['text'] = $line; |
||
506 | |||
507 | unset($element['interrupted']); |
||
508 | } else { |
||
509 | $element['text'] .= "\n" . $line; |
||
510 | } |
||
511 | } else { |
||
512 | $elements [] = $element; |
||
513 | |||
514 | $element = [ |
||
515 | 'type' => 'paragraph', |
||
516 | 'text' => $line, |
||
517 | ]; |
||
518 | } |
||
519 | } |
||
520 | |||
521 | $elements [] = $element; |
||
522 | |||
523 | unset($elements[0]); |
||
524 | |||
525 | # |
||
526 | # ~ |
||
527 | # |
||
528 | |||
529 | $markup = ''; |
||
530 | |||
531 | foreach ($elements as $element) { |
||
532 | switch ($element['type']) { |
||
533 | case 'paragraph': |
||
534 | |||
535 | $text = $this->parse_span_elements($element['text']); |
||
536 | |||
537 | if ('li' === $context and '' === $markup) { |
||
538 | if (isset($element['interrupted'])) { |
||
539 | $markup .= "\n" . '<p>' . $text . '</p>' . "\n"; |
||
540 | } else { |
||
541 | $markup .= $text; |
||
542 | } |
||
543 | } else { |
||
544 | $markup .= '<p>' . $text . '</p>' . "\n"; |
||
545 | } |
||
546 | |||
547 | break; |
||
548 | case 'blockquote': |
||
549 | |||
550 | $text = $this->parse_block_elements($element['lines']); |
||
551 | |||
552 | $markup .= '<blockquote>' . "\n" . $text . '</blockquote>' . "\n"; |
||
553 | |||
554 | break; |
||
555 | case 'code block': |
||
556 | |||
557 | $text = htmlspecialchars($element['text'], ENT_QUOTES | ENT_NOQUOTES, 'UTF-8'); |
||
558 | |||
559 | false !== mb_strpos($text, "\x1A\\") and $text = strtr($text, $this->escape_sequence_map); |
||
560 | |||
561 | $markup .= isset($element['language']) ? '<pre><code class="language-' . $element['language'] . '">' . $text . '</code></pre>' : '<pre><code>' . $text . '</code></pre>'; |
||
562 | |||
563 | $markup .= "\n"; |
||
564 | |||
565 | break; |
||
566 | case 'fenced block': |
||
567 | |||
568 | $text = $element['text']; |
||
569 | |||
570 | false !== mb_strpos($text, "\x1A\\") and $text = strtr($text, $this->escape_sequence_map); |
||
571 | |||
572 | $markup .= rex_highlight_string($text, true) . "\n"; |
||
|
|||
573 | |||
574 | $markup .= "\n"; |
||
575 | |||
576 | break; |
||
577 | case 'heading': |
||
578 | |||
579 | $text = $this->parse_span_elements($element['text']); |
||
580 | |||
581 | $markup .= '<h' . $element['level'] . '>' . $text . '</h' . $element['level'] . '>' . "\n"; |
||
582 | |||
583 | break; |
||
584 | case 'hr': |
||
585 | |||
586 | $markup .= '<hr >' . "\n"; |
||
587 | |||
588 | break; |
||
589 | case 'li': |
||
590 | |||
591 | if (isset($element['ordered'])) { # first |
||
592 | $list_type = $element['ordered'] ? 'ol' : 'ul'; |
||
593 | |||
594 | $markup .= '<' . $list_type . '>' . "\n"; |
||
595 | } |
||
596 | |||
597 | if (isset($element['interrupted']) and !isset($element['last'])) { |
||
598 | $element['lines'] [] = ''; |
||
599 | } |
||
600 | |||
601 | $text = $this->parse_block_elements($element['lines'], 'li'); |
||
602 | |||
603 | $markup .= '<li>' . $text . '</li>' . "\n"; |
||
604 | |||
605 | isset($element['last']) and $markup .= '</' . $list_type . '>' . "\n"; |
||
606 | |||
607 | break; |
||
608 | case 'block-level markup': |
||
609 | |||
610 | $markup .= $element['text'] . "\n"; |
||
611 | |||
612 | break; |
||
613 | default: |
||
614 | |||
615 | $markup .= $element['text'] . "\n"; |
||
616 | } |
||
617 | } |
||
618 | |||
619 | return $markup; |
||
620 | } |
||
621 | |||
622 | /** |
||
623 | * @param $text |
||
624 | * @param array $markers |
||
625 | * @return string |
||
626 | */ |
||
627 | private function parse_span_elements($text, $markers = ['![', '&', '*', '<', '[', '_', '`', 'http', '~~']) |
||
884 | } |
||
885 | |||
886 | # |
||
887 | # Read-only |
||
888 | # |
||
889 | |||
890 | private $inline_tags = [ |
||
891 | 'a', |
||
892 | 'abbr', |
||
893 | 'acronym', |
||
894 | 'b', |
||
895 | 'bdo', |
||
896 | 'big', |
||
897 | 'br', |
||
898 | 'button', |
||
899 | 'cite', |
||
900 | 'code', |
||
901 | 'dfn', |
||
902 | 'em', |
||
903 | 'i', |
||
904 | 'img', |
||
905 | 'input', |
||
906 | 'kbd', |
||
907 | 'label', |
||
908 | 'map', |
||
909 | 'object', |
||
910 | 'q', |
||
911 | 'samp', |
||
912 | 'script', |
||
913 | 'select', |
||
914 | 'small', |
||
915 | 'span', |
||
916 | 'strong', |
||
917 | 'sub', |
||
918 | 'sup', |
||
919 | 'textarea', |
||
920 | 'tt', |
||
921 | 'var', |
||
922 | ]; |
||
923 | |||
924 | # ~ |
||
925 | |||
926 | private $strong_regex = [ |
||
927 | '*' => '/^[*]{2}([^*]+?)[*]{2}(?![*])/s', |
||
928 | '_' => '/^__([^_]+?)__(?!_)/s', |
||
929 | ]; |
||
930 | |||
931 | private $em_regex = [ |
||
932 | '*' => '/^[*]([^*]+?)[*](?![*])/s', |
||
933 | '_' => '/^_([^_]+?)[_](?![_])\b/s', |
||
934 | ]; |
||
935 | |||
936 | private $strong_em_regex = [ |
||
937 | '*' => '/^[*]{2}(.*?)[*](.+?)[*](.*?)[*]{2}/s', |
||
938 | '_' => '/^__(.*?)_(.+?)_(.*?)__/s', |
||
939 | ]; |
||
940 | |||
941 | private $em_strong_regex = [ |
||
942 | '*' => '/^[*](.*?)[*]{2}(.+?)[*]{2}(.*?)[*]/s', |
||
943 | '_' => '/^_(.*?)__(.+?)__(.*?)_/s', |
||
944 | ]; |
||
946 |