| Total Complexity | 91 |
| Total Lines | 403 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like HtmlUp often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HtmlUp, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 14 | class HtmlUp |
||
| 15 | { |
||
| 16 | use HtmlHelper; |
||
| 17 | |||
| 18 | const RE_MD_QUOTE = '~^\s*(>+)\s+~'; |
||
| 19 | const RE_RAW = '/^<\/?\w.*?\/?>/'; |
||
| 20 | const RE_MD_SETEXT = '~^\s*(={3,}|-{3,})\s*$~'; |
||
| 21 | const RE_MD_CODE = '/^```\s*([\w-]+)?/'; |
||
| 22 | const RE_MD_RULE = '~^(_{3,}|\*{3,}|\-{3,})$~'; |
||
| 23 | const RE_MD_TCOL = '~(\|\s*\:)?\s*\-{3,}\s*(\:\s*\|)?~'; |
||
| 24 | const RE_MD_OL = '/^\d+\. /'; |
||
| 25 | |||
| 26 | protected $lines = []; |
||
| 27 | protected $stackList = []; |
||
| 28 | protected $stackBlock = []; |
||
| 29 | protected $stackTable = []; |
||
| 30 | |||
| 31 | protected $pointer = -1; |
||
| 32 | protected $listLevel = 0; |
||
| 33 | protected $quoteLevel = 0; |
||
| 34 | protected $indent = 0; |
||
| 35 | protected $nextIndent = 0; |
||
| 36 | protected $indentLen = 4; |
||
| 37 | |||
| 38 | protected $indentStr = ' '; |
||
| 39 | protected $line = ''; |
||
| 40 | protected $trimmedLine = ''; |
||
| 41 | protected $prevLine = ''; |
||
| 42 | protected $trimmedPrevLine = ''; |
||
| 43 | protected $nextLine = ''; |
||
| 44 | protected $trimmedNextLine = ''; |
||
| 45 | protected $markup = ''; |
||
| 46 | |||
| 47 | protected $inList = \false; |
||
| 48 | protected $inQuote = \false; |
||
| 49 | protected $inPara = \false; |
||
| 50 | protected $inHtml = \false; |
||
| 51 | protected $inTable = \false; |
||
| 52 | |||
| 53 | /** |
||
| 54 | * Constructor. |
||
| 55 | * |
||
| 56 | * @param string $markdown |
||
| 57 | * @param int $indentWidth |
||
| 58 | */ |
||
| 59 | public function __construct($markdown = \null, $indentWidth = 4) |
||
| 60 | { |
||
| 61 | $this->scan($markdown, $indentWidth); |
||
| 62 | } |
||
| 63 | |||
| 64 | protected function scan($markdown, $indentWidth = 4) |
||
| 65 | { |
||
| 66 | if ('' === \trim($markdown)) { |
||
| 67 | return; |
||
| 68 | } |
||
| 69 | |||
| 70 | $this->indentLen = $indentWidth == 2 ? 2 : 4; |
||
| 71 | $this->indentStr = $indentWidth == 2 ? ' ' : ' '; |
||
| 72 | |||
| 73 | // Normalize whitespaces |
||
| 74 | $markdown = \str_replace("\t", $this->indentStr, $markdown); |
||
| 75 | $markdown = \str_replace(["\r\n", "\r"], "\n", $markdown); |
||
| 76 | |||
| 77 | $this->lines = \array_merge([''], \explode("\n", $markdown), ['']); |
||
| 78 | } |
||
| 79 | |||
| 80 | public function __toString() |
||
| 81 | { |
||
| 82 | return $this->parse(); |
||
| 83 | } |
||
| 84 | |||
| 85 | /** |
||
| 86 | * Parse markdown. |
||
| 87 | * |
||
| 88 | * @param string $markdown |
||
| 89 | * @param int $indentWidth |
||
| 90 | * |
||
| 91 | * @return string |
||
| 92 | */ |
||
| 93 | public function parse($markdown = \null, $indentWidth = 4) |
||
| 94 | { |
||
| 95 | if (\null !== $markdown) { |
||
| 96 | $this->reset(\true); |
||
| 97 | |||
| 98 | $this->scan($markdown, $indentWidth); |
||
| 99 | } |
||
| 100 | |||
| 101 | if (empty($this->lines)) { |
||
| 102 | return ''; |
||
| 103 | } |
||
| 104 | |||
| 105 | $this->parseBlockElements(); |
||
| 106 | |||
| 107 | return (new SpanElementParser)->parse($this->markup); |
||
| 108 | } |
||
| 109 | |||
| 110 | protected function parseBlockElements() |
||
| 128 | } |
||
| 129 | } |
||
| 130 | |||
| 131 | protected function isBlock() |
||
| 132 | { |
||
| 133 | return $this->atx() || $this->setext() || $this->code() || $this->rule() || $this->listt(); |
||
| 134 | } |
||
| 135 | |||
| 136 | protected function init() |
||
| 137 | { |
||
| 138 | list($this->prevLine, $this->trimmedPrevLine) = [$this->line, $this->trimmedLine]; |
||
| 139 | |||
| 140 | $this->line = $this->lines[$this->pointer]; |
||
| 141 | $this->trimmedLine = \trim($this->line); |
||
| 142 | |||
| 143 | $this->indent = \strlen($this->line) - \strlen(\ltrim($this->line)); |
||
| 144 | $this->nextLine = isset($this->lines[$this->pointer + 1]) |
||
| 145 | ? $this->lines[$this->pointer + 1] |
||
| 146 | : ''; |
||
| 147 | $this->trimmedNextLine = \trim($this->nextLine); |
||
| 148 | $this->nextIndent = \strlen($this->nextLine) - \strlen(\ltrim($this->nextLine)); |
||
| 149 | } |
||
| 150 | |||
| 151 | protected function reset($all = \false) |
||
| 152 | { |
||
| 153 | $except = $all ? [] : \array_flip(['lines', 'pointer', 'markup', 'indentStr', 'indentLen']); |
||
| 154 | |||
| 155 | // Reset all current values. |
||
| 156 | foreach (\get_class_vars(__CLASS__) as $prop => $value) { |
||
| 157 | isset($except[$prop]) || $this->{$prop} = $value; |
||
| 158 | } |
||
| 159 | } |
||
| 160 | |||
| 161 | protected function flush() |
||
| 184 | } |
||
| 185 | |||
| 186 | protected function raw() |
||
| 187 | { |
||
| 188 | if ($this->inHtml || \preg_match(static::RE_RAW, $this->trimmedLine)) { |
||
| 189 | $this->markup .= "\n$this->line"; |
||
| 190 | if (!$this->inHtml && empty($this->lines[$this->pointer - 1])) { |
||
| 191 | $this->inHtml = \true; |
||
| 192 | } |
||
| 193 | |||
| 194 | return \true; |
||
| 195 | } |
||
| 196 | } |
||
| 197 | |||
| 198 | protected function quote() |
||
| 199 | { |
||
| 200 | if (\preg_match(static::RE_MD_QUOTE, $this->line, $quoteMatch)) { |
||
| 201 | $this->line = \substr($this->line, \strlen($quoteMatch[0])); |
||
| 202 | $this->trimmedLine = \trim($this->line); |
||
| 203 | |||
| 204 | if (!$this->inQuote || $this->quoteLevel < \strlen($quoteMatch[1])) { |
||
| 205 | $this->markup .= "\n<blockquote>"; |
||
| 206 | |||
| 207 | $this->stackBlock[] = "\n</blockquote>"; |
||
| 208 | |||
| 209 | $this->quoteLevel++; |
||
| 210 | } |
||
| 211 | |||
| 212 | return $this->inQuote = \true; |
||
| 213 | } |
||
| 214 | } |
||
| 215 | |||
| 216 | protected function atx() |
||
| 217 | { |
||
| 218 | if (isset($this->trimmedLine[0]) && $this->trimmedLine[0] === '#') { |
||
| 219 | $level = \strlen($this->trimmedLine) - \strlen(\ltrim($this->trimmedLine, '#')); |
||
| 220 | |||
| 221 | if ($level < 7) { |
||
| 222 | $this->markup .= "\n<h{$level}>" . \ltrim(\ltrim($this->trimmedLine, '# ')) . "</h{$level}>"; |
||
| 223 | |||
| 224 | return \true; |
||
| 225 | } |
||
| 226 | } |
||
| 227 | } |
||
| 228 | |||
| 229 | protected function setext() |
||
| 230 | { |
||
| 231 | if (\preg_match(static::RE_MD_SETEXT, $this->nextLine)) { |
||
| 232 | $level = \trim($this->nextLine, '- ') === '' ? 2 : 1; |
||
| 233 | |||
| 234 | $this->markup .= "\n<h{$level}>{$this->trimmedLine}</h{$level}>"; |
||
| 235 | |||
| 236 | $this->pointer++; |
||
| 237 | |||
| 238 | return \true; |
||
| 239 | } |
||
| 240 | } |
||
| 241 | |||
| 242 | protected function code() |
||
| 265 | } |
||
| 266 | } |
||
| 267 | |||
| 268 | public function codeInternal($codeBlock) |
||
| 269 | { |
||
| 270 | while (isset($this->lines[$this->pointer + 1])) { |
||
| 271 | $this->line = $this->escape($this->lines[$this->pointer + 1]); |
||
| 272 | |||
| 273 | if (($codeBlock && \substr(\ltrim($this->line), 0, 3) !== '```') |
||
| 274 | || \strpos($this->line, $this->indentStr) === 0 |
||
| 275 | ) { |
||
| 276 | $this->markup .= "\n"; // @todo: donot use \n for first line |
||
| 277 | $this->markup .= $codeBlock ? $this->line : \substr($this->line, $this->indentLen); |
||
| 278 | |||
| 279 | $this->pointer++; |
||
| 280 | } else { |
||
| 281 | break; |
||
| 282 | } |
||
| 283 | } |
||
| 284 | } |
||
| 285 | |||
| 286 | protected function rule() |
||
| 287 | { |
||
| 288 | if ($this->trimmedPrevLine === '' |
||
| 289 | && \preg_match(static::RE_MD_RULE, $this->trimmedLine) |
||
| 290 | ) { |
||
| 291 | $this->markup .= "\n<hr />"; |
||
| 292 | |||
| 293 | return \true; |
||
| 294 | } |
||
| 295 | } |
||
| 296 | |||
| 297 | protected function listt() |
||
| 298 | { |
||
| 299 | $isUl = \in_array(\substr($this->trimmedLine, 0, 2), ['- ', '* ', '+ ']); |
||
| 300 | |||
| 301 | if ($isUl || \preg_match(static::RE_MD_OL, $this->trimmedLine)) { |
||
| 302 | $wrapper = $isUl ? 'ul' : 'ol'; |
||
| 303 | |||
| 304 | if (!$this->inList) { |
||
| 305 | $this->stackList[] = "</$wrapper>"; |
||
| 306 | $this->markup .= "\n<$wrapper>\n"; |
||
| 307 | $this->inList = \true; |
||
| 308 | |||
| 309 | $this->listLevel++; |
||
| 310 | } |
||
| 311 | |||
| 312 | $this->markup .= '<li>' . \ltrim($this->trimmedLine, '+-*0123456789. '); |
||
| 313 | |||
| 314 | $this->listInternal(); |
||
| 315 | |||
| 316 | return \true; |
||
| 317 | } |
||
| 318 | } |
||
| 319 | |||
| 320 | protected function listInternal() |
||
| 321 | { |
||
| 322 | $isUl = \in_array(\substr($this->trimmedNextLine, 0, 2), ['- ', '* ', '+ ']); |
||
| 323 | |||
| 324 | if ($isUl || \preg_match(static::RE_MD_OL, $this->trimmedNextLine)) { |
||
| 325 | $wrapper = $isUl ? 'ul' : 'ol'; |
||
| 326 | if ($this->nextIndent > $this->indent) { |
||
| 327 | $this->stackList[] = "</li>\n"; |
||
| 328 | $this->stackList[] = "</$wrapper>"; |
||
| 329 | $this->markup .= "\n<$wrapper>\n"; |
||
| 330 | |||
| 331 | $this->listLevel++; |
||
| 332 | } else { |
||
| 333 | $this->markup .= "</li>\n"; |
||
| 334 | } |
||
| 335 | |||
| 336 | if ($this->nextIndent < $this->indent) { |
||
| 337 | $shift = \intval(($this->indent - $this->nextIndent) / $this->indentLen); |
||
| 338 | |||
| 339 | while ($shift--) { |
||
| 340 | $this->markup .= \array_pop($this->stackList); |
||
| 341 | |||
| 342 | if ($this->listLevel > 2) { |
||
| 343 | $this->markup .= \array_pop($this->stackList); |
||
| 344 | } |
||
| 345 | } |
||
| 346 | } |
||
| 347 | } else { |
||
| 348 | $this->markup .= "</li>\n"; |
||
| 349 | } |
||
| 350 | } |
||
| 351 | |||
| 352 | protected function table() |
||
| 353 | { |
||
| 354 | static $headerCount = 0; |
||
| 355 | |||
| 356 | if (!$this->inTable) { |
||
| 357 | $headerCount = \substr_count(\trim($this->trimmedLine, '|'), '|'); |
||
| 358 | |||
| 359 | return $this->tableInternal($headerCount); |
||
| 360 | } |
||
| 361 | |||
| 362 | $this->markup .= "<tr>\n"; |
||
| 363 | |||
| 364 | foreach (\explode('|', \trim($this->trimmedLine, '|')) as $i => $col) { |
||
| 365 | if ($i > $headerCount) { |
||
| 366 | break; |
||
| 367 | } |
||
| 368 | |||
| 369 | $col = \trim($col); |
||
| 370 | $this->markup .= "<td>{$col}</td>\n"; |
||
| 371 | } |
||
| 372 | |||
| 373 | $this->markup .= "</tr>\n"; |
||
| 374 | |||
| 375 | if (empty($this->trimmedNextLine) |
||
| 376 | || !\substr_count(\trim($this->trimmedNextLine, '|'), '|') |
||
| 377 | ) { |
||
| 378 | $headerCount = 0; |
||
| 379 | $this->inTable = \false; |
||
| 380 | $this->stackTable[] = "</tbody>\n</table>"; |
||
| 381 | } |
||
| 382 | |||
| 383 | return \true; |
||
| 384 | } |
||
| 385 | |||
| 386 | protected function tableInternal($headerCount) |
||
| 387 | { |
||
| 388 | $columnCount = \preg_match_all(static::RE_MD_TCOL, \trim($this->trimmedNextLine, '|')); |
||
| 389 | |||
| 390 | if ($headerCount > 0 && $headerCount <= $columnCount) { |
||
| 391 | $this->pointer++; |
||
| 392 | |||
| 393 | $this->inTable = \true; |
||
| 394 | $this->markup .= "<table>\n<thead>\n<tr>\n"; |
||
| 395 | $this->trimmedLine = \trim($this->trimmedLine, '|'); |
||
| 396 | |||
| 397 | foreach (\explode('|', $this->trimmedLine) as $hdr) { |
||
| 398 | $this->markup .= '<th>' . \trim($hdr) . "</th>\n"; |
||
| 399 | } |
||
| 400 | |||
| 401 | $this->markup .= "</tr>\n</thead>\n<tbody>\n"; |
||
| 402 | |||
| 403 | return \true; |
||
| 404 | } |
||
| 405 | } |
||
| 406 | |||
| 407 | protected function paragraph() |
||
| 417 | } |
||
| 418 | } |
||
| 419 | } |
||
| 420 |