Total Complexity | 151 |
Total Lines | 948 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like Parsedown often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parsedown, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
19 | class Parsedown |
||
|
|||
20 | { |
||
21 | # |
||
22 | # Multiton (http://en.wikipedia.org/wiki/Multiton_pattern) |
||
23 | # |
||
24 | |||
25 | /** |
||
26 | * @param string $name |
||
27 | * @return mixed|Parsedown |
||
28 | */ |
||
29 | public static function instance($name = 'default') |
||
40 | } |
||
41 | |||
42 | private static $instances = array(); |
||
43 | |||
44 | # |
||
45 | # Setters |
||
46 | # |
||
47 | |||
48 | private $break_marker = " \n"; |
||
49 | |||
50 | /** |
||
51 | * @param $breaks_enabled |
||
52 | * @return $this |
||
53 | */ |
||
54 | public function set_breaks_enabled($breaks_enabled) |
||
55 | { |
||
56 | $this->break_marker = $breaks_enabled ? "\n" : " \n"; |
||
57 | |||
58 | return $this; |
||
59 | } |
||
60 | |||
61 | # |
||
62 | # Fields |
||
63 | # |
||
64 | |||
65 | private $reference_map = array(); |
||
66 | private $escape_sequence_map = array(); |
||
67 | |||
68 | # |
||
69 | # Public Methods |
||
70 | # |
||
71 | |||
72 | /** |
||
73 | * @param $text |
||
74 | * @return mixed|string |
||
75 | */ |
||
76 | public function parse($text) |
||
77 | { |
||
78 | # removes \r characters |
||
79 | $text = str_replace("\r\n", "\n", $text); |
||
80 | $text = str_replace("\r", "\n", $text); |
||
81 | |||
82 | # replaces tabs with spaces |
||
83 | $text = str_replace("\t", ' ', $text); |
||
84 | |||
85 | # encodes escape sequences |
||
86 | |||
87 | if (false !== strpos($text, '\\')) { |
||
88 | $escape_sequences = array('\\\\', '\`', '\*', '\_', '\{', '\}', '\[', '\]', '\(', '\)', '\>', '\#', '\+', '\-', '\.', '\!'); |
||
89 | |||
90 | foreach ($escape_sequences as $index => $escape_sequence) { |
||
91 | if (false !== strpos($text, $escape_sequence)) { |
||
92 | $code = "\x1A" . '\\' . $index . ';'; |
||
93 | |||
94 | $text = str_replace($escape_sequence, $code, $text); |
||
95 | |||
96 | $this->escape_sequence_map[$code] = $escape_sequence; |
||
97 | } |
||
98 | } |
||
99 | } |
||
100 | |||
101 | # ~ |
||
102 | |||
103 | $text = trim($text, "\n"); |
||
104 | |||
105 | $lines = explode("\n", $text); |
||
106 | |||
107 | $text = $this->parse_block_elements($lines); |
||
108 | |||
109 | # decodes escape sequences |
||
110 | |||
111 | foreach ($this->escape_sequence_map as $code => $escape_sequence) { |
||
112 | $text = str_replace($code, $escape_sequence[1], $text); |
||
113 | } |
||
114 | |||
115 | # ~ |
||
116 | |||
117 | $text = rtrim($text, "\n"); |
||
118 | |||
119 | return $text; |
||
120 | } |
||
121 | |||
122 | # |
||
123 | # Private Methods |
||
124 | # |
||
125 | |||
126 | /** |
||
127 | * @param array $lines |
||
128 | * @param string $context |
||
129 | * @return string |
||
130 | */ |
||
131 | private function parse_block_elements(array $lines, $context = '') |
||
132 | { |
||
133 | $elements = array(); |
||
134 | |||
135 | $element = array( |
||
136 | 'type' => '' |
||
137 | ); |
||
138 | |||
139 | foreach ($lines as $line) { |
||
140 | # fenced elements |
||
141 | |||
142 | switch ($element['type']) { |
||
143 | case 'fenced block': |
||
144 | |||
145 | if (!isset($element['closed'])) { |
||
146 | if (preg_match('/^[ ]*' . $element['fence'][0] . '{3,}[ ]*$/', $line)) { |
||
147 | $element['closed'] = true; |
||
148 | } else { |
||
149 | '' !== $element['text'] and $element['text'] .= "\n"; |
||
150 | |||
151 | $element['text'] .= $line; |
||
152 | } |
||
153 | |||
154 | continue 2; |
||
155 | } |
||
156 | |||
157 | break; |
||
158 | |||
159 | case 'block-level markup': |
||
160 | |||
161 | if (!isset($element['closed'])) { |
||
162 | if (false !== strpos($line, $element['start'])) { # opening tag |
||
163 | $element['depth']++; |
||
164 | } |
||
165 | |||
166 | if (false !== strpos($line, $element['end'])) { # closing tag |
||
167 | $element['depth'] > 0 ? $element['depth']-- : $element['closed'] = true; |
||
168 | } |
||
169 | |||
170 | $element['text'] .= "\n" . $line; |
||
171 | |||
172 | continue 2; |
||
173 | } |
||
174 | |||
175 | break; |
||
176 | } |
||
177 | |||
178 | # * |
||
179 | |||
180 | $deindented_line = ltrim($line); |
||
181 | |||
182 | if ('' === $deindented_line) { |
||
183 | $element['interrupted'] = true; |
||
184 | |||
185 | continue; |
||
186 | } |
||
187 | |||
188 | # composite elements |
||
189 | |||
190 | switch ($element['type']) { |
||
191 | case 'blockquote': |
||
192 | |||
193 | if (!isset($element['interrupted'])) { |
||
194 | $line = preg_replace('/^[ ]*>[ ]?/', '', $line); |
||
195 | |||
196 | $element['lines'] [] = $line; |
||
197 | |||
198 | continue 2; |
||
199 | } |
||
200 | |||
201 | break; |
||
202 | |||
203 | case 'li': |
||
204 | |||
205 | if (preg_match('/^([ ]{0,3})(\d+[.]|[*+-])[ ](.*)/', $line, $matches)) { |
||
206 | if ($element['indentation'] !== $matches[1]) { |
||
207 | $element['lines'] [] = $line; |
||
208 | } else { |
||
209 | unset($element['last']); |
||
210 | |||
211 | $elements [] = $element; |
||
212 | |||
213 | $element = array( |
||
214 | 'type' => 'li', |
||
215 | 'indentation' => $matches[1], |
||
216 | 'last' => true, |
||
217 | 'lines' => array( |
||
218 | preg_replace('/^[ ]{0,4}/', '', $matches[3]) |
||
219 | ) |
||
220 | ); |
||
221 | } |
||
222 | |||
223 | continue 2; |
||
224 | } |
||
225 | |||
226 | if (isset($element['interrupted'])) { |
||
227 | if (' ' === $line[0]) { |
||
228 | $element['lines'] [] = ''; |
||
229 | |||
230 | $line = preg_replace('/^[ ]{0,4}/', '', $line); |
||
231 | |||
232 | $element['lines'] [] = $line; |
||
233 | |||
234 | unset($element['interrupted']); |
||
235 | |||
236 | continue 2; |
||
237 | } |
||
238 | } else { |
||
239 | $line = preg_replace('/^[ ]{0,4}/', '', $line); |
||
240 | |||
241 | $element['lines'] [] = $line; |
||
242 | |||
243 | continue 2; |
||
244 | } |
||
245 | |||
246 | break; |
||
247 | } |
||
248 | |||
249 | # indentation sensitive types |
||
250 | |||
251 | switch ($line[0]) { |
||
252 | case ' ': |
||
253 | |||
254 | # code block |
||
255 | |||
256 | if (isset($line[3]) and ' ' === $line[3] and ' ' === $line[2] and ' ' === $line[1]) { |
||
257 | $code_line = substr($line, 4); |
||
258 | |||
259 | if ('code block' === $element['type']) { |
||
260 | if (isset($element['interrupted'])) { |
||
261 | $element['text'] .= "\n"; |
||
262 | |||
263 | unset($element['interrupted']); |
||
264 | } |
||
265 | |||
266 | $element['text'] .= "\n" . $code_line; |
||
267 | } else { |
||
268 | $elements [] = $element; |
||
269 | |||
270 | $element = array( |
||
271 | 'type' => 'code block', |
||
272 | 'text' => $code_line |
||
273 | ); |
||
274 | } |
||
275 | |||
276 | continue 2; |
||
277 | } |
||
278 | |||
279 | break; |
||
280 | |||
281 | case '#': |
||
282 | |||
283 | # atx heading (#) |
||
284 | |||
285 | if (isset($line[1])) { |
||
286 | $elements [] = $element; |
||
287 | |||
288 | $level = 1; |
||
289 | |||
290 | while (isset($line[$level]) and '#' === $line[$level]) { |
||
291 | ++$level; |
||
292 | } |
||
293 | |||
294 | $element = array( |
||
295 | 'type' => 'heading', |
||
296 | 'text' => trim($line, '# '), |
||
297 | 'level' => $level |
||
298 | ); |
||
299 | |||
300 | continue 2; |
||
301 | } |
||
302 | |||
303 | break; |
||
304 | |||
305 | case '-': |
||
306 | case '=': |
||
307 | |||
308 | # setext heading |
||
309 | |||
310 | if ('paragraph' === $element['type'] and false === isset($element['interrupted'])) { |
||
311 | $chopped_line = rtrim($line); |
||
312 | |||
313 | $i = 1; |
||
314 | |||
315 | while (isset($chopped_line[$i])) { |
||
316 | if ($chopped_line[$i] !== $line[0]) { |
||
317 | break 2; |
||
318 | } |
||
319 | |||
320 | ++$i; |
||
321 | } |
||
322 | |||
323 | $element['type'] = 'heading'; |
||
324 | $element['level'] = '-' === $line[0] ? 2 : 1; |
||
325 | |||
326 | continue 2; |
||
327 | } |
||
328 | |||
329 | break; |
||
330 | } |
||
331 | |||
332 | # indentation insensitive types |
||
333 | |||
334 | switch ($deindented_line[0]) { |
||
335 | case '<': |
||
336 | |||
337 | $position = strpos($deindented_line, '>'); |
||
338 | |||
339 | if ($position > 1) { # tag |
||
340 | $name = substr($deindented_line, 1, $position - 1); |
||
341 | $name = rtrim($name); |
||
342 | |||
343 | if ('/' === substr($name, -1)) { |
||
344 | $self_closing = true; |
||
345 | |||
346 | $name = substr($name, 0, -1); |
||
347 | } |
||
348 | |||
349 | $position = strpos($name, ' '); |
||
350 | |||
351 | if ($position) { |
||
352 | $name = substr($name, 0, $position); |
||
353 | } |
||
354 | |||
355 | if (!ctype_alpha($name)) { |
||
356 | break; |
||
357 | } |
||
358 | |||
359 | if (in_array($name, $this->inline_tags)) { |
||
360 | break; |
||
361 | } |
||
362 | |||
363 | $elements [] = $element; |
||
364 | |||
365 | if (isset($self_closing)) { |
||
366 | $element = array( |
||
367 | 'type' => 'self-closing tag', |
||
368 | 'text' => $deindented_line |
||
369 | ); |
||
370 | |||
371 | unset($self_closing); |
||
372 | |||
373 | continue 2; |
||
374 | } |
||
375 | |||
376 | $element = array( |
||
377 | 'type' => 'block-level markup', |
||
378 | 'text' => $deindented_line, |
||
379 | 'start' => '<' . $name . '>', |
||
380 | 'end' => '</' . $name . '>', |
||
381 | 'depth' => 0 |
||
382 | ); |
||
383 | |||
384 | if (strpos($deindented_line, $element['end'])) { |
||
385 | $element['closed'] = true; |
||
386 | } |
||
387 | |||
388 | continue 2; |
||
389 | } |
||
390 | |||
391 | break; |
||
392 | |||
393 | case '>': |
||
394 | |||
395 | # quote |
||
396 | |||
397 | if (preg_match('/^>[ ]?(.*)/', $deindented_line, $matches)) { |
||
398 | $elements [] = $element; |
||
399 | |||
400 | $element = array( |
||
401 | 'type' => 'blockquote', |
||
402 | 'lines' => array( |
||
403 | $matches[1] |
||
404 | ) |
||
405 | ); |
||
406 | |||
407 | continue 2; |
||
408 | } |
||
409 | |||
410 | break; |
||
411 | |||
412 | case '[': |
||
413 | |||
414 | # reference |
||
415 | |||
416 | if (preg_match('/^\[(.+?)\]:[ ]*(.+?)(?:[ ]+[\'"](.+?)[\'"])?[ ]*$/', $deindented_line, $matches)) { |
||
417 | $label = strtolower($matches[1]); |
||
418 | |||
419 | $this->reference_map[$label] = array( |
||
420 | '»' => trim($matches[2], '<>') |
||
421 | ); |
||
422 | |||
423 | if (isset($matches[3])) { |
||
424 | $this->reference_map[$label]['#'] = $matches[3]; |
||
425 | } |
||
426 | |||
427 | continue 2; |
||
428 | } |
||
429 | |||
430 | break; |
||
431 | |||
432 | case '`': |
||
433 | case '~': |
||
434 | |||
435 | # fenced code block |
||
436 | |||
437 | if (preg_match('/^([`]{3,}|[~]{3,})[ ]*(\S+)?[ ]*$/', $deindented_line, $matches)) { |
||
438 | $elements [] = $element; |
||
439 | |||
440 | $element = array( |
||
441 | 'type' => 'fenced block', |
||
442 | 'text' => '', |
||
443 | 'fence' => $matches[1] |
||
444 | ); |
||
445 | |||
446 | isset($matches[2]) and $element['language'] = $matches[2]; |
||
447 | |||
448 | continue 2; |
||
449 | } |
||
450 | |||
451 | break; |
||
452 | |||
453 | case '*': |
||
454 | case '+': |
||
455 | case '-': |
||
456 | case '_': |
||
457 | |||
458 | # hr |
||
459 | |||
460 | if (preg_match('/^([-*_])([ ]{0,2}\1){2,}[ ]*$/', $deindented_line)) { |
||
461 | $elements [] = $element; |
||
462 | |||
463 | $element = array( |
||
464 | 'type' => 'hr' |
||
465 | ); |
||
466 | |||
467 | continue 2; |
||
468 | } |
||
469 | |||
470 | # li |
||
471 | |||
472 | if (preg_match('/^([ ]*)[*+-][ ](.*)/', $line, $matches)) { |
||
473 | $elements [] = $element; |
||
474 | |||
475 | $element = array( |
||
476 | 'type' => 'li', |
||
477 | 'ordered' => false, |
||
478 | 'indentation' => $matches[1], |
||
479 | 'last' => true, |
||
480 | 'lines' => array( |
||
481 | preg_replace('/^[ ]{0,4}/', '', $matches[2]) |
||
482 | ) |
||
483 | ); |
||
484 | |||
485 | continue 2; |
||
486 | } |
||
487 | } |
||
488 | |||
489 | # li |
||
490 | |||
491 | if ($deindented_line[0] <= '9' and $deindented_line[0] >= '0' and preg_match('/^([ ]*)\d+[.][ ](.*)/', $line, $matches)) { |
||
492 | $elements [] = $element; |
||
493 | |||
494 | $element = array( |
||
495 | 'type' => 'li', |
||
496 | 'ordered' => true, |
||
497 | 'indentation' => $matches[1], |
||
498 | 'last' => true, |
||
499 | 'lines' => array( |
||
500 | preg_replace('/^[ ]{0,4}/', '', $matches[2]) |
||
501 | ) |
||
502 | ); |
||
503 | |||
504 | continue; |
||
505 | } |
||
506 | |||
507 | # paragraph |
||
508 | |||
509 | if ('paragraph' === $element['type']) { |
||
510 | if (isset($element['interrupted'])) { |
||
511 | $elements [] = $element; |
||
512 | |||
513 | $element['text'] = $line; |
||
514 | |||
515 | unset($element['interrupted']); |
||
516 | } else { |
||
517 | $element['text'] .= "\n" . $line; |
||
518 | } |
||
519 | } else { |
||
520 | $elements [] = $element; |
||
521 | |||
522 | $element = array( |
||
523 | 'type' => 'paragraph', |
||
524 | 'text' => $line |
||
525 | ); |
||
526 | } |
||
527 | } |
||
528 | |||
529 | $elements [] = $element; |
||
530 | |||
531 | unset($elements[0]); |
||
532 | |||
533 | # |
||
534 | # ~ |
||
535 | # |
||
536 | |||
537 | $markup = ''; |
||
538 | |||
539 | foreach ($elements as $element) { |
||
540 | switch ($element['type']) { |
||
541 | case 'paragraph': |
||
542 | |||
543 | $text = $this->parse_span_elements($element['text']); |
||
544 | |||
545 | if ('li' === $context and '' === $markup) { |
||
546 | if (isset($element['interrupted'])) { |
||
547 | $markup .= "\n" . '<p>' . $text . '</p>' . "\n"; |
||
548 | } else { |
||
549 | $markup .= $text; |
||
550 | } |
||
551 | } else { |
||
552 | $markup .= '<p>' . $text . '</p>' . "\n"; |
||
553 | } |
||
554 | |||
555 | break; |
||
556 | |||
557 | case 'blockquote': |
||
558 | |||
559 | $text = $this->parse_block_elements($element['lines']); |
||
560 | |||
561 | $markup .= '<blockquote>' . "\n" . $text . '</blockquote>' . "\n"; |
||
562 | |||
563 | break; |
||
564 | |||
565 | case 'code block': |
||
566 | |||
567 | $text = htmlspecialchars($element['text'], ENT_NOQUOTES, 'UTF-8'); |
||
568 | |||
569 | false !== strpos($text, "\x1A\\") and $text = strtr($text, $this->escape_sequence_map); |
||
570 | |||
571 | $markup .= isset($element['language']) ? '<pre><code class="language-' . $element['language'] . '">' . $text . '</code></pre>' : '<pre><code>' . $text . '</code></pre>'; |
||
572 | |||
573 | $markup .= "\n"; |
||
574 | |||
575 | break; |
||
576 | |||
577 | case 'fenced block': |
||
578 | |||
579 | $text = $element['text']; |
||
580 | |||
581 | false !== strpos($text, "\x1A\\") and $text = strtr($text, $this->escape_sequence_map); |
||
582 | |||
583 | $markup .= rex_highlight_string($text, true) . "\n"; |
||
584 | |||
585 | $markup .= "\n"; |
||
586 | |||
587 | break; |
||
588 | |||
589 | case 'heading': |
||
590 | |||
591 | $text = $this->parse_span_elements($element['text']); |
||
592 | |||
593 | $markup .= '<h' . $element['level'] . '>' . $text . '</h' . $element['level'] . '>' . "\n"; |
||
594 | |||
595 | break; |
||
596 | |||
597 | case 'hr': |
||
598 | |||
599 | $markup .= '<hr >' . "\n"; |
||
600 | |||
601 | break; |
||
602 | |||
603 | case 'li': |
||
604 | |||
605 | if (isset($element['ordered'])) { # first |
||
606 | $list_type = $element['ordered'] ? 'ol' : 'ul'; |
||
607 | |||
608 | $markup .= '<' . $list_type . '>' . "\n"; |
||
609 | } |
||
610 | |||
611 | if (isset($element['interrupted']) and !isset($element['last'])) { |
||
612 | $element['lines'] [] = ''; |
||
613 | } |
||
614 | |||
615 | $text = $this->parse_block_elements($element['lines'], 'li'); |
||
616 | |||
617 | $markup .= '<li>' . $text . '</li>' . "\n"; |
||
618 | |||
619 | isset($element['last']) and $markup .= '</' . $list_type . '>' . "\n"; |
||
620 | |||
621 | break; |
||
622 | |||
623 | case 'block-level markup': |
||
624 | |||
625 | $markup .= $element['text'] . "\n"; |
||
626 | |||
627 | break; |
||
628 | |||
629 | default: |
||
630 | |||
631 | $markup .= $element['text'] . "\n"; |
||
632 | } |
||
633 | } |
||
634 | |||
635 | return $markup; |
||
636 | } |
||
637 | |||
638 | /** |
||
639 | * @param $text |
||
640 | * @param array $markers |
||
641 | * @return string |
||
642 | */ |
||
643 | private function parse_span_elements($text, $markers = array('![', '&', '*', '<', '[', '_', '`', 'http', '~~')) |
||
907 | } |
||
908 | |||
909 | # |
||
910 | # Read-only |
||
911 | # |
||
912 | |||
913 | private $inline_tags = array( |
||
914 | 'a', |
||
915 | 'abbr', |
||
916 | 'acronym', |
||
917 | 'b', |
||
918 | 'bdo', |
||
919 | 'big', |
||
920 | 'br', |
||
921 | 'button', |
||
922 | 'cite', |
||
923 | 'code', |
||
924 | 'dfn', |
||
925 | 'em', |
||
926 | 'i', |
||
927 | 'img', |
||
928 | 'input', |
||
929 | 'kbd', |
||
930 | 'label', |
||
931 | 'map', |
||
932 | 'object', |
||
933 | 'q', |
||
934 | 'samp', |
||
935 | 'script', |
||
936 | 'select', |
||
937 | 'small', |
||
938 | 'span', |
||
939 | 'strong', |
||
940 | 'sub', |
||
941 | 'sup', |
||
942 | 'textarea', |
||
943 | 'tt', |
||
944 | 'var' |
||
945 | ); |
||
946 | |||
947 | # ~ |
||
948 | |||
949 | private $strong_regex = array( |
||
950 | '*' => '/^[*]{2}([^*]+?)[*]{2}(?![*])/s', |
||
951 | '_' => '/^__([^_]+?)__(?!_)/s' |
||
952 | ); |
||
953 | |||
954 | private $em_regex = array( |
||
955 | '*' => '/^[*]([^*]+?)[*](?![*])/s', |
||
956 | '_' => '/^_([^_]+?)[_](?![_])\b/s' |
||
957 | ); |
||
958 | |||
959 | private $strong_em_regex = array( |
||
960 | '*' => '/^[*]{2}(.*?)[*](.+?)[*](.*?)[*]{2}/s', |
||
961 | '_' => '/^__(.*?)_(.+?)_(.*?)__/s' |
||
962 | ); |
||
963 | |||
964 | private $em_strong_regex = array( |
||
965 | '*' => '/^[*](.*?)[*]{2}(.+?)[*]{2}(.*?)[*]/s', |
||
966 | '_' => '/^_(.*?)__(.+?)__(.*?)_/s' |
||
967 | ); |
||
969 |
You can fix this by adding a namespace to your class:
When choosing a vendor namespace, try to pick something that is not too generic to avoid conflicts with other libraries.