o2system /
html
| 1 | <?php |
||
| 2 | /** |
||
| 3 | * This file is part of the O2System Framework package. |
||
| 4 | * |
||
| 5 | * For the full copyright and license information, please view the LICENSE |
||
| 6 | * file that was distributed with this source code. |
||
| 7 | * |
||
| 8 | * @author Steeve Andrian Salim |
||
| 9 | * @copyright Copyright (c) Steeve Andrian Salim |
||
| 10 | */ |
||
| 11 | |||
| 12 | // ------------------------------------------------------------------------ |
||
| 13 | |||
| 14 | namespace O2System\Html\Dom; |
||
| 15 | |||
| 16 | // ------------------------------------------------------------------------ |
||
| 17 | |||
| 18 | /** |
||
| 19 | * Class Beautifier |
||
| 20 | * |
||
| 21 | * @package O2System\HTML\DOM |
||
| 22 | */ |
||
| 23 | class Beautifier |
||
| 24 | {
|
||
| 25 | /** |
||
| 26 | * Beautifier::ELEMENT_TYPE_BLOCK |
||
| 27 | * |
||
| 28 | * HTML Element Type Block Constant |
||
| 29 | * |
||
| 30 | * @var int |
||
| 31 | */ |
||
| 32 | const ELEMENT_TYPE_BLOCK = 0; |
||
| 33 | |||
| 34 | /** |
||
| 35 | * Beautifier::ELEMENT_TYPE_INLINE |
||
| 36 | * |
||
| 37 | * HTML Element Type Inline Constant |
||
| 38 | * |
||
| 39 | * @var int |
||
| 40 | */ |
||
| 41 | const ELEMENT_TYPE_INLINE = 1; |
||
| 42 | |||
| 43 | /** |
||
| 44 | * Beautifier::MATCH_INDENT_NO |
||
| 45 | * |
||
| 46 | * HTML Element No Indent |
||
| 47 | * |
||
| 48 | * @var int |
||
| 49 | */ |
||
| 50 | const MATCH_INDENT_NO = 0; |
||
| 51 | |||
| 52 | /** |
||
| 53 | * Beautifier::MATCH_INDENT_DECREASE |
||
| 54 | * |
||
| 55 | * HTML Element Indent Decrease |
||
| 56 | * |
||
| 57 | * @var int |
||
| 58 | */ |
||
| 59 | const MATCH_INDENT_DECREASE = 1; |
||
| 60 | |||
| 61 | /** |
||
| 62 | * Beautifier::MATCH_INDENT_INCREASE |
||
| 63 | * |
||
| 64 | * HTML Element Indent Increase |
||
| 65 | * |
||
| 66 | * @var int |
||
| 67 | */ |
||
| 68 | const MATCH_INDENT_INCREASE = 2; |
||
| 69 | |||
| 70 | /** |
||
| 71 | * Beautifier::MATCH_DISCARD |
||
| 72 | * |
||
| 73 | * HTML Element Indent Discard |
||
| 74 | * |
||
| 75 | * @var int |
||
| 76 | */ |
||
| 77 | const MATCH_DISCARD = 3; |
||
| 78 | |||
| 79 | /** |
||
| 80 | * Beautifier::$indentCharacter |
||
| 81 | * |
||
| 82 | * Indentation Character |
||
| 83 | * |
||
| 84 | * @var string |
||
| 85 | */ |
||
| 86 | private $indentCharacter = ' '; |
||
| 87 | |||
| 88 | /** |
||
| 89 | * Beautifier::$inlineElements |
||
| 90 | * |
||
| 91 | * Inline Elements |
||
| 92 | * |
||
| 93 | * @var array |
||
| 94 | */ |
||
| 95 | private $inlineElements = [ |
||
| 96 | 'b', |
||
| 97 | 'big', |
||
| 98 | 'i', |
||
| 99 | 'small', |
||
| 100 | 'tt', |
||
| 101 | 'abbr', |
||
| 102 | 'acronym', |
||
| 103 | 'cite', |
||
| 104 | 'code', |
||
| 105 | 'dfn', |
||
| 106 | 'em', |
||
| 107 | 'kbd', |
||
| 108 | 'strong', |
||
| 109 | 'samp', |
||
| 110 | 'var', |
||
| 111 | 'a', |
||
| 112 | 'bdo', |
||
| 113 | 'br', |
||
| 114 | 'img', |
||
| 115 | 'span', |
||
| 116 | 'sub', |
||
| 117 | 'sup', |
||
| 118 | ]; |
||
| 119 | |||
| 120 | // ------------------------------------------------------------------------ |
||
| 121 | |||
| 122 | /** |
||
| 123 | * Beautifier::setElementType |
||
| 124 | * |
||
| 125 | * @param string $elementName |
||
| 126 | * @param int $type FormatOutput::ELEMENT_TYPE_BLOCK | FormatOutput::ELEMENT_TYPE_INLINE |
||
| 127 | */ |
||
| 128 | public function setElementType($elementName, $type) |
||
| 129 | {
|
||
| 130 | if ($type === static::ELEMENT_TYPE_BLOCK) {
|
||
| 131 | $this->inlineElements = array_diff($this->inlineElements, [$elementName]); |
||
| 132 | } else {
|
||
| 133 | if ($type === static::ELEMENT_TYPE_INLINE) {
|
||
| 134 | $this->inlineElements[] = $elementName; |
||
| 135 | } |
||
| 136 | } |
||
| 137 | |||
| 138 | if ($this->inlineElements) {
|
||
|
0 ignored issues
–
show
|
|||
| 139 | $this->inlineElements = array_unique($this->inlineElements); |
||
| 140 | } |
||
| 141 | } |
||
| 142 | |||
| 143 | // ------------------------------------------------------------------------ |
||
| 144 | |||
| 145 | /** |
||
| 146 | * Beautifier::format |
||
| 147 | * |
||
| 148 | * @param $source |
||
| 149 | * |
||
| 150 | * @return string |
||
| 151 | */ |
||
| 152 | public function format($source) |
||
| 153 | {
|
||
| 154 | // We does not indent <script> body. Instead, it temporary removes it from the code, indents the input, and restores the script body. |
||
| 155 | $tempScriptElements = []; |
||
| 156 | |||
| 157 | if (preg_match_all('/<script\b[^>]*>([\s\S]*?)<\/script>/mi', $source, $matches)) {
|
||
| 158 | $tempScriptElements = $matches[ 0 ]; |
||
| 159 | |||
| 160 | foreach ($matches[ 0 ] as $i => $match) {
|
||
| 161 | $source = str_replace($match, '<script>' . ($i + 1) . '</script>', $source); |
||
| 162 | } |
||
| 163 | } |
||
| 164 | |||
| 165 | // Removing double whitespaces to make the source code easier to read. |
||
| 166 | // With exception of <pre>/ CSS white-space changing the default behaviour, double whitespace is meaningless in HTML output. |
||
| 167 | // This reason alone is sufficient not to use indentation in production. |
||
| 168 | $source = str_replace("\t", '', $source);
|
||
| 169 | $source = preg_replace('/\s{2,}/', ' ', $source);
|
||
| 170 | |||
| 171 | // Remove inline elements and replace them with text entities. |
||
| 172 | $tempInlineElements = []; |
||
| 173 | |||
| 174 | if (preg_match_all( |
||
| 175 | '/<(' . implode('|', $this->inlineElements) . ')[^>]*>(?:[^<]*)<\/\1>/',
|
||
| 176 | $source, |
||
| 177 | $matches |
||
| 178 | )) {
|
||
| 179 | $tempInlineElements = $matches[ 0 ]; |
||
| 180 | |||
| 181 | foreach ($matches[ 0 ] as $i => $match) {
|
||
| 182 | $source = str_replace($match, 'ᐃ' . ($i + 1) . 'ᐃ', $source); |
||
| 183 | } |
||
| 184 | } |
||
| 185 | |||
| 186 | $output = ''; |
||
| 187 | |||
| 188 | $nextLineIndentationLevel = 0; |
||
| 189 | |||
| 190 | do {
|
||
| 191 | $indentationLevel = $nextLineIndentationLevel; |
||
| 192 | |||
| 193 | $patterns = [ |
||
| 194 | // block tag |
||
| 195 | '/^(<([a-z]+)(?:[^>]*)>(?:[^<]*)<\/(?:\2)>)/' => static::MATCH_INDENT_NO, |
||
| 196 | // DOCTYPE |
||
| 197 | '/^<!([^>]*)>/' => static::MATCH_INDENT_NO, |
||
| 198 | // tag with implied closing |
||
| 199 | '/^<(input|link|meta|base|br|img|hr)([^>]*)>/' => static::MATCH_INDENT_NO, |
||
| 200 | // opening tag |
||
| 201 | '/^<[^\/]([^>]*)>/' => static::MATCH_INDENT_INCREASE, |
||
| 202 | // closing tag |
||
| 203 | '/^<\/([^>]*)>/' => static::MATCH_INDENT_DECREASE, |
||
| 204 | // self-closing tag |
||
| 205 | '/^<(.+)\/>/' => static::MATCH_INDENT_DECREASE, |
||
| 206 | // whitespace |
||
| 207 | '/^(\s+)/' => static::MATCH_DISCARD, |
||
| 208 | // text node |
||
| 209 | '/([^<]+)/' => static::MATCH_INDENT_NO, |
||
| 210 | ]; |
||
| 211 | |||
| 212 | foreach ($patterns as $pattern => $rule) {
|
||
| 213 | if ($match = preg_match($pattern, $source, $matches)) {
|
||
| 214 | if (function_exists('mb_substr')) {
|
||
| 215 | $source = mb_substr($source, mb_strlen($matches[ 0 ])); |
||
| 216 | } else {
|
||
| 217 | $source = substr($source, strlen($matches[ 0 ])); |
||
| 218 | } |
||
| 219 | |||
| 220 | if ($rule === static::MATCH_DISCARD) {
|
||
| 221 | break; |
||
| 222 | } |
||
| 223 | |||
| 224 | if ($rule === static::MATCH_INDENT_NO) {
|
||
| 225 | |||
| 226 | } else {
|
||
| 227 | if ($rule === static::MATCH_INDENT_DECREASE) {
|
||
| 228 | $nextLineIndentationLevel--; |
||
| 229 | $indentationLevel--; |
||
| 230 | } else {
|
||
| 231 | $nextLineIndentationLevel++; |
||
| 232 | } |
||
| 233 | } |
||
| 234 | |||
| 235 | if ($indentationLevel < 0) {
|
||
| 236 | $indentationLevel = 0; |
||
| 237 | } |
||
| 238 | |||
| 239 | $output .= str_repeat($this->indentCharacter, $indentationLevel) . $matches[ 0 ] . "\n"; |
||
| 240 | |||
| 241 | break; |
||
| 242 | } |
||
| 243 | } |
||
| 244 | } while ($match); |
||
|
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
|
|||
| 245 | |||
| 246 | $output = preg_replace('/(<(\w+)[^>]*>)\s*(<\/\2>)/', '\\1\\3', $output);
|
||
| 247 | |||
| 248 | foreach ($tempScriptElements as $i => $original) {
|
||
| 249 | $output = str_replace('<script>' . ($i + 1) . '</script>', $original, $output);
|
||
| 250 | } |
||
| 251 | |||
| 252 | foreach ($tempInlineElements as $i => $original) {
|
||
| 253 | $output = str_replace('ᐃ' . ($i + 1) . 'ᐃ', $original, $output);
|
||
| 254 | } |
||
| 255 | |||
| 256 | return trim($output); |
||
| 257 | } |
||
| 258 | } |
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)or! empty(...)instead.