| Total Complexity | 151 |
| Total Lines | 925 |
| Duplicated Lines | 0 % |
| Changes | 1 | ||
| Bugs | 0 | Features | 0 |
Complex classes like Parsedown often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parsedown, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 19 | class Parsedown |
||
| 20 | { |
||
| 21 | # |
||
| 22 | # Multiton (http://en.wikipedia.org/wiki/Multiton_pattern) |
||
| 23 | # |
||
| 24 | |||
| 25 | /** |
||
| 26 | * @param string $name |
||
| 27 | * @return mixed|Parsedown |
||
| 28 | */ |
||
| 29 | public static function instance($name = 'default') |
||
| 40 | } |
||
| 41 | |||
| 42 | private static $instances = []; |
||
| 43 | |||
| 44 | # |
||
| 45 | # Setters |
||
| 46 | # |
||
| 47 | |||
| 48 | private $break_marker = " \n"; |
||
| 49 | |||
| 50 | /** |
||
| 51 | * @param $breaks_enabled |
||
| 52 | * @return $this |
||
| 53 | */ |
||
| 54 | public function set_breaks_enabled($breaks_enabled) |
||
| 59 | } |
||
| 60 | |||
| 61 | # |
||
| 62 | # Fields |
||
| 63 | # |
||
| 64 | |||
| 65 | private $reference_map = []; |
||
| 66 | private $escape_sequence_map = []; |
||
| 67 | |||
| 68 | # |
||
| 69 | # Public Methods |
||
| 70 | # |
||
| 71 | |||
| 72 | /** |
||
| 73 | * @param $text |
||
| 74 | * @return mixed|string |
||
| 75 | */ |
||
| 76 | public function parse($text) |
||
| 120 | } |
||
| 121 | |||
| 122 | # |
||
| 123 | # Private Methods |
||
| 124 | # |
||
| 125 | |||
| 126 | /** |
||
| 127 | * @param array $lines |
||
| 128 | * @param string $context |
||
| 129 | * @return string |
||
| 130 | */ |
||
| 131 | private function parse_block_elements(array $lines, $context = '') |
||
| 132 | { |
||
| 133 | $elements = []; |
||
| 134 | |||
| 135 | $element = [ |
||
| 136 | 'type' => '', |
||
| 137 | ]; |
||
| 138 | |||
| 139 | foreach ($lines as $line) { |
||
| 140 | # fenced elements |
||
| 141 | |||
| 142 | switch ($element['type']) { |
||
| 143 | case 'fenced block': |
||
| 144 | |||
| 145 | if (!isset($element['closed'])) { |
||
| 146 | if (preg_match('/^[ ]*' . $element['fence'][0] . '{3,}[ ]*$/', $line)) { |
||
| 147 | $element['closed'] = true; |
||
| 148 | } else { |
||
| 149 | '' !== $element['text'] and $element['text'] .= "\n"; |
||
| 150 | |||
| 151 | $element['text'] .= $line; |
||
| 152 | } |
||
| 153 | |||
| 154 | continue 2; |
||
| 155 | } |
||
| 156 | |||
| 157 | break; |
||
| 158 | case 'block-level markup': |
||
| 159 | |||
| 160 | if (!isset($element['closed'])) { |
||
| 161 | if (false !== mb_strpos($line, $element['start'])) { # opening tag |
||
| 162 | $element['depth']++; |
||
| 163 | } |
||
| 164 | |||
| 165 | if (false !== mb_strpos($line, $element['end'])) { # closing tag |
||
| 166 | $element['depth'] > 0 ? $element['depth']-- : $element['closed'] = true; |
||
| 167 | } |
||
| 168 | |||
| 169 | $element['text'] .= "\n" . $line; |
||
| 170 | |||
| 171 | continue 2; |
||
| 172 | } |
||
| 173 | |||
| 174 | break; |
||
| 175 | } |
||
| 176 | |||
| 177 | # * |
||
| 178 | |||
| 179 | $deindented_line = ltrim($line); |
||
| 180 | |||
| 181 | if ('' === $deindented_line) { |
||
| 182 | $element['interrupted'] = true; |
||
| 183 | |||
| 184 | continue; |
||
| 185 | } |
||
| 186 | |||
| 187 | # composite elements |
||
| 188 | |||
| 189 | switch ($element['type']) { |
||
| 190 | case 'blockquote': |
||
| 191 | |||
| 192 | if (!isset($element['interrupted'])) { |
||
| 193 | $line = preg_replace('/^[ ]*>[ ]?/', '', $line); |
||
| 194 | |||
| 195 | $element['lines'] [] = $line; |
||
| 196 | |||
| 197 | continue 2; |
||
| 198 | } |
||
| 199 | |||
| 200 | break; |
||
| 201 | case 'li': |
||
| 202 | |||
| 203 | if (preg_match('/^([ ]{0,3})(\d+[.]|[*+-])[ ](.*)/', $line, $matches)) { |
||
| 204 | if ($element['indentation'] !== $matches[1]) { |
||
| 205 | $element['lines'] [] = $line; |
||
| 206 | } else { |
||
| 207 | unset($element['last']); |
||
| 208 | |||
| 209 | $elements [] = $element; |
||
| 210 | |||
| 211 | $element = [ |
||
| 212 | 'type' => 'li', |
||
| 213 | 'indentation' => $matches[1], |
||
| 214 | 'last' => true, |
||
| 215 | 'lines' => [ |
||
| 216 | preg_replace('/^[ ]{0,4}/', '', $matches[3]), |
||
| 217 | ], |
||
| 218 | ]; |
||
| 219 | } |
||
| 220 | |||
| 221 | continue 2; |
||
| 222 | } |
||
| 223 | |||
| 224 | if (isset($element['interrupted'])) { |
||
| 225 | if (' ' === $line[0]) { |
||
| 226 | $element['lines'] [] = ''; |
||
| 227 | |||
| 228 | $line = preg_replace('/^[ ]{0,4}/', '', $line); |
||
| 229 | |||
| 230 | $element['lines'] [] = $line; |
||
| 231 | |||
| 232 | unset($element['interrupted']); |
||
| 233 | |||
| 234 | continue 2; |
||
| 235 | } |
||
| 236 | } else { |
||
| 237 | $line = preg_replace('/^[ ]{0,4}/', '', $line); |
||
| 238 | |||
| 239 | $element['lines'] [] = $line; |
||
| 240 | |||
| 241 | continue 2; |
||
| 242 | } |
||
| 243 | |||
| 244 | break; |
||
| 245 | } |
||
| 246 | |||
| 247 | # indentation sensitive types |
||
| 248 | |||
| 249 | switch ($line[0]) { |
||
| 250 | case ' ': |
||
| 251 | |||
| 252 | # code block |
||
| 253 | |||
| 254 | if (isset($line[3]) and ' ' === $line[3] and ' ' === $line[2] and ' ' === $line[1]) { |
||
| 255 | $code_line = mb_substr($line, 4); |
||
| 256 | |||
| 257 | if ('code block' === $element['type']) { |
||
| 258 | if (isset($element['interrupted'])) { |
||
| 259 | $element['text'] .= "\n"; |
||
| 260 | |||
| 261 | unset($element['interrupted']); |
||
| 262 | } |
||
| 263 | |||
| 264 | $element['text'] .= "\n" . $code_line; |
||
| 265 | } else { |
||
| 266 | $elements [] = $element; |
||
| 267 | |||
| 268 | $element = [ |
||
| 269 | 'type' => 'code block', |
||
| 270 | 'text' => $code_line, |
||
| 271 | ]; |
||
| 272 | } |
||
| 273 | |||
| 274 | continue 2; |
||
| 275 | } |
||
| 276 | |||
| 277 | break; |
||
| 278 | case '#': |
||
| 279 | |||
| 280 | # atx heading (#) |
||
| 281 | |||
| 282 | if (isset($line[1])) { |
||
| 283 | $elements [] = $element; |
||
| 284 | |||
| 285 | $level = 1; |
||
| 286 | |||
| 287 | while (isset($line[$level]) and '#' === $line[$level]) { |
||
| 288 | ++$level; |
||
| 289 | } |
||
| 290 | |||
| 291 | $element = [ |
||
| 292 | 'type' => 'heading', |
||
| 293 | 'text' => trim($line, '# '), |
||
| 294 | 'level' => $level, |
||
| 295 | ]; |
||
| 296 | |||
| 297 | continue 2; |
||
| 298 | } |
||
| 299 | |||
| 300 | break; |
||
| 301 | case '-': |
||
| 302 | case '=': |
||
| 303 | |||
| 304 | # setext heading |
||
| 305 | |||
| 306 | if ('paragraph' === $element['type'] and false === isset($element['interrupted'])) { |
||
| 307 | $chopped_line = rtrim($line); |
||
| 308 | |||
| 309 | $i = 1; |
||
| 310 | |||
| 311 | while (isset($chopped_line[$i])) { |
||
| 312 | if ($chopped_line[$i] !== $line[0]) { |
||
| 313 | break 2; |
||
| 314 | } |
||
| 315 | |||
| 316 | ++$i; |
||
| 317 | } |
||
| 318 | |||
| 319 | $element['type'] = 'heading'; |
||
| 320 | $element['level'] = '-' === $line[0] ? 2 : 1; |
||
| 321 | |||
| 322 | continue 2; |
||
| 323 | } |
||
| 324 | |||
| 325 | break; |
||
| 326 | } |
||
| 327 | |||
| 328 | # indentation insensitive types |
||
| 329 | |||
| 330 | switch ($deindented_line[0]) { |
||
| 331 | case '<': |
||
| 332 | |||
| 333 | $position = mb_strpos($deindented_line, '>'); |
||
| 334 | |||
| 335 | if ($position > 1) { # tag |
||
| 336 | $name = mb_substr($deindented_line, 1, $position - 1); |
||
| 337 | $name = rtrim($name); |
||
| 338 | |||
| 339 | if ('/' === mb_substr($name, -1)) { |
||
| 340 | $self_closing = true; |
||
| 341 | |||
| 342 | $name = mb_substr($name, 0, -1); |
||
| 343 | } |
||
| 344 | |||
| 345 | $position = mb_strpos($name, ' '); |
||
| 346 | |||
| 347 | if ($position) { |
||
| 348 | $name = mb_substr($name, 0, $position); |
||
| 349 | } |
||
| 350 | |||
| 351 | if (!ctype_alpha($name)) { |
||
| 352 | break; |
||
| 353 | } |
||
| 354 | |||
| 355 | if (in_array($name, $this->inline_tags, true)) { |
||
| 356 | break; |
||
| 357 | } |
||
| 358 | |||
| 359 | $elements [] = $element; |
||
| 360 | |||
| 361 | if (isset($self_closing)) { |
||
| 362 | $element = [ |
||
| 363 | 'type' => 'self-closing tag', |
||
| 364 | 'text' => $deindented_line, |
||
| 365 | ]; |
||
| 366 | |||
| 367 | unset($self_closing); |
||
| 368 | |||
| 369 | continue 2; |
||
| 370 | } |
||
| 371 | |||
| 372 | $element = [ |
||
| 373 | 'type' => 'block-level markup', |
||
| 374 | 'text' => $deindented_line, |
||
| 375 | 'start' => '<' . $name . '>', |
||
| 376 | 'end' => '</' . $name . '>', |
||
| 377 | 'depth' => 0, |
||
| 378 | ]; |
||
| 379 | |||
| 380 | if (mb_strpos($deindented_line, $element['end'])) { |
||
| 381 | $element['closed'] = true; |
||
| 382 | } |
||
| 383 | |||
| 384 | continue 2; |
||
| 385 | } |
||
| 386 | |||
| 387 | break; |
||
| 388 | case '>': |
||
| 389 | |||
| 390 | # quote |
||
| 391 | |||
| 392 | if (preg_match('/^>[ ]?(.*)/', $deindented_line, $matches)) { |
||
| 393 | $elements [] = $element; |
||
| 394 | |||
| 395 | $element = [ |
||
| 396 | 'type' => 'blockquote', |
||
| 397 | 'lines' => [ |
||
| 398 | $matches[1], |
||
| 399 | ], |
||
| 400 | ]; |
||
| 401 | |||
| 402 | continue 2; |
||
| 403 | } |
||
| 404 | |||
| 405 | break; |
||
| 406 | case '[': |
||
| 407 | |||
| 408 | # reference |
||
| 409 | |||
| 410 | if (preg_match('/^\[(.+?)\]:[ ]*(.+?)(?:[ ]+[\'"](.+?)[\'"])?[ ]*$/', $deindented_line, $matches)) { |
||
| 411 | $label = mb_strtolower($matches[1]); |
||
| 412 | |||
| 413 | $this->reference_map[$label] = [ |
||
| 414 | '»' => trim($matches[2], '<>'), |
||
| 415 | ]; |
||
| 416 | |||
| 417 | if (isset($matches[3])) { |
||
| 418 | $this->reference_map[$label]['#'] = $matches[3]; |
||
| 419 | } |
||
| 420 | |||
| 421 | continue 2; |
||
| 422 | } |
||
| 423 | |||
| 424 | break; |
||
| 425 | case '`': |
||
| 426 | case '~': |
||
| 427 | |||
| 428 | # fenced code block |
||
| 429 | |||
| 430 | if (preg_match('/^([`]{3,}|[~]{3,})[ ]*(\S+)?[ ]*$/', $deindented_line, $matches)) { |
||
| 431 | $elements [] = $element; |
||
| 432 | |||
| 433 | $element = [ |
||
| 434 | 'type' => 'fenced block', |
||
| 435 | 'text' => '', |
||
| 436 | 'fence' => $matches[1], |
||
| 437 | ]; |
||
| 438 | |||
| 439 | isset($matches[2]) and $element['language'] = $matches[2]; |
||
| 440 | |||
| 441 | continue 2; |
||
| 442 | } |
||
| 443 | |||
| 444 | break; |
||
| 445 | case '*': |
||
| 446 | case '+': |
||
| 447 | case '-': |
||
| 448 | case '_': |
||
| 449 | |||
| 450 | # hr |
||
| 451 | |||
| 452 | if (preg_match('/^([-*_])([ ]{0,2}\1){2,}[ ]*$/', $deindented_line)) { |
||
| 453 | $elements [] = $element; |
||
| 454 | |||
| 455 | $element = [ |
||
| 456 | 'type' => 'hr', |
||
| 457 | ]; |
||
| 458 | |||
| 459 | continue 2; |
||
| 460 | } |
||
| 461 | |||
| 462 | # li |
||
| 463 | |||
| 464 | if (preg_match('/^([ ]*)[*+-][ ](.*)/', $line, $matches)) { |
||
| 465 | $elements [] = $element; |
||
| 466 | |||
| 467 | $element = [ |
||
| 468 | 'type' => 'li', |
||
| 469 | 'ordered' => false, |
||
| 470 | 'indentation' => $matches[1], |
||
| 471 | 'last' => true, |
||
| 472 | 'lines' => [ |
||
| 473 | preg_replace('/^[ ]{0,4}/', '', $matches[2]), |
||
| 474 | ], |
||
| 475 | ]; |
||
| 476 | |||
| 477 | continue 2; |
||
| 478 | } |
||
| 479 | } |
||
| 480 | |||
| 481 | # li |
||
| 482 | |||
| 483 | if ($deindented_line[0] <= '9' and $deindented_line[0] >= '0' and preg_match('/^([ ]*)\d+[.][ ](.*)/', $line, $matches)) { |
||
| 484 | $elements [] = $element; |
||
| 485 | |||
| 486 | $element = [ |
||
| 487 | 'type' => 'li', |
||
| 488 | 'ordered' => true, |
||
| 489 | 'indentation' => $matches[1], |
||
| 490 | 'last' => true, |
||
| 491 | 'lines' => [ |
||
| 492 | preg_replace('/^[ ]{0,4}/', '', $matches[2]), |
||
| 493 | ], |
||
| 494 | ]; |
||
| 495 | |||
| 496 | continue; |
||
| 497 | } |
||
| 498 | |||
| 499 | # paragraph |
||
| 500 | |||
| 501 | if ('paragraph' === $element['type']) { |
||
| 502 | if (isset($element['interrupted'])) { |
||
| 503 | $elements [] = $element; |
||
| 504 | |||
| 505 | $element['text'] = $line; |
||
| 506 | |||
| 507 | unset($element['interrupted']); |
||
| 508 | } else { |
||
| 509 | $element['text'] .= "\n" . $line; |
||
| 510 | } |
||
| 511 | } else { |
||
| 512 | $elements [] = $element; |
||
| 513 | |||
| 514 | $element = [ |
||
| 515 | 'type' => 'paragraph', |
||
| 516 | 'text' => $line, |
||
| 517 | ]; |
||
| 518 | } |
||
| 519 | } |
||
| 520 | |||
| 521 | $elements [] = $element; |
||
| 522 | |||
| 523 | unset($elements[0]); |
||
| 524 | |||
| 525 | # |
||
| 526 | # ~ |
||
| 527 | # |
||
| 528 | |||
| 529 | $markup = ''; |
||
| 530 | |||
| 531 | foreach ($elements as $element) { |
||
| 532 | switch ($element['type']) { |
||
| 533 | case 'paragraph': |
||
| 534 | |||
| 535 | $text = $this->parse_span_elements($element['text']); |
||
| 536 | |||
| 537 | if ('li' === $context and '' === $markup) { |
||
| 538 | if (isset($element['interrupted'])) { |
||
| 539 | $markup .= "\n" . '<p>' . $text . '</p>' . "\n"; |
||
| 540 | } else { |
||
| 541 | $markup .= $text; |
||
| 542 | } |
||
| 543 | } else { |
||
| 544 | $markup .= '<p>' . $text . '</p>' . "\n"; |
||
| 545 | } |
||
| 546 | |||
| 547 | break; |
||
| 548 | case 'blockquote': |
||
| 549 | |||
| 550 | $text = $this->parse_block_elements($element['lines']); |
||
| 551 | |||
| 552 | $markup .= '<blockquote>' . "\n" . $text . '</blockquote>' . "\n"; |
||
| 553 | |||
| 554 | break; |
||
| 555 | case 'code block': |
||
| 556 | |||
| 557 | $text = htmlspecialchars($element['text'], ENT_QUOTES | ENT_NOQUOTES, 'UTF-8'); |
||
| 558 | |||
| 559 | false !== mb_strpos($text, "\x1A\\") and $text = strtr($text, $this->escape_sequence_map); |
||
| 560 | |||
| 561 | $markup .= isset($element['language']) ? '<pre><code class="language-' . $element['language'] . '">' . $text . '</code></pre>' : '<pre><code>' . $text . '</code></pre>'; |
||
| 562 | |||
| 563 | $markup .= "\n"; |
||
| 564 | |||
| 565 | break; |
||
| 566 | case 'fenced block': |
||
| 567 | |||
| 568 | $text = $element['text']; |
||
| 569 | |||
| 570 | false !== mb_strpos($text, "\x1A\\") and $text = strtr($text, $this->escape_sequence_map); |
||
| 571 | |||
| 572 | $markup .= rex_highlight_string($text, true) . "\n"; |
||
|
|
|||
| 573 | |||
| 574 | $markup .= "\n"; |
||
| 575 | |||
| 576 | break; |
||
| 577 | case 'heading': |
||
| 578 | |||
| 579 | $text = $this->parse_span_elements($element['text']); |
||
| 580 | |||
| 581 | $markup .= '<h' . $element['level'] . '>' . $text . '</h' . $element['level'] . '>' . "\n"; |
||
| 582 | |||
| 583 | break; |
||
| 584 | case 'hr': |
||
| 585 | |||
| 586 | $markup .= '<hr >' . "\n"; |
||
| 587 | |||
| 588 | break; |
||
| 589 | case 'li': |
||
| 590 | |||
| 591 | if (isset($element['ordered'])) { # first |
||
| 592 | $list_type = $element['ordered'] ? 'ol' : 'ul'; |
||
| 593 | |||
| 594 | $markup .= '<' . $list_type . '>' . "\n"; |
||
| 595 | } |
||
| 596 | |||
| 597 | if (isset($element['interrupted']) and !isset($element['last'])) { |
||
| 598 | $element['lines'] [] = ''; |
||
| 599 | } |
||
| 600 | |||
| 601 | $text = $this->parse_block_elements($element['lines'], 'li'); |
||
| 602 | |||
| 603 | $markup .= '<li>' . $text . '</li>' . "\n"; |
||
| 604 | |||
| 605 | isset($element['last']) and $markup .= '</' . $list_type . '>' . "\n"; |
||
| 606 | |||
| 607 | break; |
||
| 608 | case 'block-level markup': |
||
| 609 | |||
| 610 | $markup .= $element['text'] . "\n"; |
||
| 611 | |||
| 612 | break; |
||
| 613 | default: |
||
| 614 | |||
| 615 | $markup .= $element['text'] . "\n"; |
||
| 616 | } |
||
| 617 | } |
||
| 618 | |||
| 619 | return $markup; |
||
| 620 | } |
||
| 621 | |||
| 622 | /** |
||
| 623 | * @param $text |
||
| 624 | * @param array $markers |
||
| 625 | * @return string |
||
| 626 | */ |
||
| 627 | private function parse_span_elements($text, $markers = ['![', '&', '*', '<', '[', '_', '`', 'http', '~~']) |
||
| 884 | } |
||
| 885 | |||
| 886 | # |
||
| 887 | # Read-only |
||
| 888 | # |
||
| 889 | |||
| 890 | private $inline_tags = [ |
||
| 891 | 'a', |
||
| 892 | 'abbr', |
||
| 893 | 'acronym', |
||
| 894 | 'b', |
||
| 895 | 'bdo', |
||
| 896 | 'big', |
||
| 897 | 'br', |
||
| 898 | 'button', |
||
| 899 | 'cite', |
||
| 900 | 'code', |
||
| 901 | 'dfn', |
||
| 902 | 'em', |
||
| 903 | 'i', |
||
| 904 | 'img', |
||
| 905 | 'input', |
||
| 906 | 'kbd', |
||
| 907 | 'label', |
||
| 908 | 'map', |
||
| 909 | 'object', |
||
| 910 | 'q', |
||
| 911 | 'samp', |
||
| 912 | 'script', |
||
| 913 | 'select', |
||
| 914 | 'small', |
||
| 915 | 'span', |
||
| 916 | 'strong', |
||
| 917 | 'sub', |
||
| 918 | 'sup', |
||
| 919 | 'textarea', |
||
| 920 | 'tt', |
||
| 921 | 'var', |
||
| 922 | ]; |
||
| 923 | |||
| 924 | # ~ |
||
| 925 | |||
| 926 | private $strong_regex = [ |
||
| 927 | '*' => '/^[*]{2}([^*]+?)[*]{2}(?![*])/s', |
||
| 928 | '_' => '/^__([^_]+?)__(?!_)/s', |
||
| 929 | ]; |
||
| 930 | |||
| 931 | private $em_regex = [ |
||
| 932 | '*' => '/^[*]([^*]+?)[*](?![*])/s', |
||
| 933 | '_' => '/^_([^_]+?)[_](?![_])\b/s', |
||
| 934 | ]; |
||
| 935 | |||
| 936 | private $strong_em_regex = [ |
||
| 937 | '*' => '/^[*]{2}(.*?)[*](.+?)[*](.*?)[*]{2}/s', |
||
| 938 | '_' => '/^__(.*?)_(.+?)_(.*?)__/s', |
||
| 939 | ]; |
||
| 940 | |||
| 941 | private $em_strong_regex = [ |
||
| 942 | '*' => '/^[*](.*?)[*]{2}(.+?)[*]{2}(.*?)[*]/s', |
||
| 943 | '_' => '/^_(.*?)__(.+?)__(.*?)_/s', |
||
| 944 | ]; |
||
| 946 |