@@ -13,519 +13,519 @@ |
||
| 13 | 13 | class CleanHtmlService implements SingletonInterface |
| 14 | 14 | { |
| 15 | 15 | |
| 16 | - /** |
|
| 17 | - * Enable Debug comment in footer |
|
| 18 | - * |
|
| 19 | - * @var boolean |
|
| 20 | - */ |
|
| 21 | - protected $debugComment = false; |
|
| 22 | - |
|
| 23 | - /** |
|
| 24 | - * Format Type |
|
| 25 | - * |
|
| 26 | - * @var integer |
|
| 27 | - */ |
|
| 28 | - protected $formatType = 2; |
|
| 29 | - |
|
| 30 | - /** |
|
| 31 | - * Tab character |
|
| 32 | - * |
|
| 33 | - * @var string |
|
| 34 | - */ |
|
| 35 | - protected $tab = "\t"; |
|
| 36 | - |
|
| 37 | - /** |
|
| 38 | - * Newline character |
|
| 39 | - * |
|
| 40 | - * @var string |
|
| 41 | - */ |
|
| 42 | - protected $newline = "\n"; |
|
| 43 | - |
|
| 44 | - /** |
|
| 45 | - * Enable/disable UTF8 support |
|
| 46 | - * |
|
| 47 | - * @var boolean |
|
| 48 | - */ |
|
| 49 | - protected $utf8 = true; |
|
| 50 | - |
|
| 51 | - /** |
|
| 52 | - * Configured extra header comment |
|
| 53 | - * |
|
| 54 | - * @var string |
|
| 55 | - */ |
|
| 56 | - protected $headerComment = ''; |
|
| 57 | - |
|
| 58 | - /** |
|
| 59 | - * Set variables based on given config |
|
| 60 | - * |
|
| 61 | - * @param array $config |
|
| 62 | - * |
|
| 63 | - * @return void |
|
| 64 | - */ |
|
| 65 | - public function setVariables(array $config) |
|
| 66 | - { |
|
| 67 | - switch (TYPO3_OS) { // set newline |
|
| 68 | - case 'WIN': |
|
| 69 | - $this->newline = "\r\n"; |
|
| 70 | - break; |
|
| 71 | - default: |
|
| 72 | - $this->newline = "\n"; |
|
| 73 | - } |
|
| 74 | - |
|
| 75 | - if (!empty($config)) { |
|
| 76 | - if ($config['formatHtml'] && is_numeric($config['formatHtml'])) { |
|
| 77 | - $this->formatType = (int)$config['formatHtml']; |
|
| 78 | - } |
|
| 79 | - |
|
| 80 | - if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) { |
|
| 81 | - $this->tab = str_pad('', $config['formatHtml.']['tabSize'], ' '); |
|
| 82 | - } |
|
| 83 | - |
|
| 84 | - if (isset($config['enable_utf'])) { |
|
| 85 | - $this->utf8 = (bool)$config['enable_utf-8_support']; |
|
| 86 | - } |
|
| 87 | - |
|
| 88 | - if (isset($config['formatHtml.']['debugComment'])) { |
|
| 89 | - $this->debugComment = (bool)$config['formatHtml.']['debugComment']; |
|
| 90 | - } |
|
| 91 | - |
|
| 92 | - if (isset($config['headerComment'])) { |
|
| 93 | - $this->headerComment = $config['headerComment']; |
|
| 94 | - } |
|
| 95 | - } |
|
| 96 | - } |
|
| 97 | - |
|
| 98 | - /** |
|
| 99 | - * Clean given HTML with formatter |
|
| 100 | - * |
|
| 101 | - * @param string $html |
|
| 102 | - * @param array $config |
|
| 103 | - * |
|
| 104 | - * @return void |
|
| 105 | - */ |
|
| 106 | - public function clean(&$html, $config = []) |
|
| 107 | - { |
|
| 108 | - if (!empty($config)) { |
|
| 109 | - if ((bool)$config['enabled'] === false) { |
|
| 110 | - return; |
|
| 111 | - } |
|
| 112 | - |
|
| 113 | - $this->setVariables($config); |
|
| 114 | - } |
|
| 115 | - |
|
| 116 | - $manipulations = []; |
|
| 117 | - |
|
| 118 | - if (isset($config['removeGenerator']) && (bool)$config['removeGenerator']) { |
|
| 119 | - $manipulations['removeGenerator'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveGenerator'); |
|
| 120 | - } |
|
| 121 | - |
|
| 122 | - if (isset($config['removeComments']) && (bool)$config['removeComments']) { |
|
| 123 | - $manipulations['removeComments'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveComments'); |
|
| 124 | - } |
|
| 125 | - |
|
| 126 | - if (isset($config['removeBlurScript']) && (bool)$config['removeBlurScript']) { |
|
| 127 | - $manipulations['removeBlurScript'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveBlurScript'); |
|
| 128 | - } |
|
| 129 | - |
|
| 130 | - if (!empty($this->headerComment)) { |
|
| 131 | - $this->includeHeaderComment($html); |
|
| 132 | - } |
|
| 133 | - |
|
| 134 | - foreach ($manipulations as $key => $manipulation) { |
|
| 135 | - /** @var ManipulationInterface $manipulation */ |
|
| 136 | - $configuration = isset($config[$key . '.']) && is_array($config[$key . '.']) ? $config[$key . '.'] : []; |
|
| 137 | - $html = $manipulation->manipulate($html, $configuration); |
|
| 138 | - } |
|
| 139 | - |
|
| 140 | - if ($this->formatType) { |
|
| 141 | - $this->formatHtml($html); |
|
| 142 | - } |
|
| 143 | - } |
|
| 144 | - |
|
| 145 | - /** |
|
| 146 | - * Formats the (X)HTML code: |
|
| 147 | - * - taps according to the hirarchy of the tags |
|
| 148 | - * - removes empty spaces between tags |
|
| 149 | - * - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..) |
|
| 150 | - * choose from five options: |
|
| 151 | - * 0 => off |
|
| 152 | - * 1 => no line break at all (code in one line) |
|
| 153 | - * 2 => minimalistic line breaks (structure defining box-elements) |
|
| 154 | - * 3 => aesthetic line breaks (important box-elements) |
|
| 155 | - * 4 => logic line breaks (all box-elements) |
|
| 156 | - * 5 => max line breaks (all elements) |
|
| 157 | - * |
|
| 158 | - * @param string $html |
|
| 159 | - * |
|
| 160 | - * @return void |
|
| 161 | - */ |
|
| 162 | - protected function formatHtml(&$html) |
|
| 163 | - { |
|
| 164 | - // Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers |
|
| 165 | - preg_match_all( |
|
| 166 | - '/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im', |
|
| 167 | - $html, |
|
| 168 | - $matches |
|
| 169 | - ); |
|
| 170 | - $noFormat = $matches[0]; // do not format these block elements |
|
| 171 | - for ($i = 0; $i < count($noFormat); $i++) { |
|
| 172 | - $html = str_replace($noFormat[$i], "\n<!-- ELEMENT $i -->", $html); |
|
| 173 | - } |
|
| 174 | - |
|
| 175 | - // define box elements for formatting |
|
| 176 | - $trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section'; |
|
| 177 | - $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup'; |
|
| 178 | - $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script'; |
|
| 179 | - $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--'; |
|
| 180 | - $allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')'; |
|
| 181 | - $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)'; |
|
| 182 | - $structureBoxLikeElements = '(?>html|head|body|div|!--)'; |
|
| 183 | - |
|
| 184 | - // split html into it's elements |
|
| 185 | - $htmlArrayTemp = preg_split( |
|
| 186 | - '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', |
|
| 187 | - $html, |
|
| 188 | - -1, |
|
| 189 | - PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY |
|
| 190 | - ); |
|
| 191 | - // remove empty lines |
|
| 192 | - $htmlArray = ['']; |
|
| 193 | - $z = 1; |
|
| 194 | - for ($x = 0; $x < count($htmlArrayTemp); $x++) { |
|
| 195 | - $t = trim($htmlArrayTemp[$x]); |
|
| 196 | - if ($t !== '') { |
|
| 197 | - $htmlArray[$z] = $htmlArrayTemp[$x]; |
|
| 198 | - $z++; |
|
| 199 | - } else { |
|
| 200 | - $htmlArray[$z] = ' '; |
|
| 201 | - $z++; |
|
| 202 | - } |
|
| 203 | - } |
|
| 204 | - |
|
| 205 | - // rebuild html |
|
| 206 | - $html = ''; |
|
| 207 | - $tabs = 0; |
|
| 208 | - for ($x = 0; $x < count($htmlArray); $x++) { |
|
| 209 | - // check if the element should stand in a new line |
|
| 210 | - $newline = false; |
|
| 211 | - if (substr($htmlArray[$x - 1], 0, 5) == '<?xml') { |
|
| 212 | - $newline = true; |
|
| 213 | - } elseif ($this->formatType == 2 && ( // minimalistic line break |
|
| 214 | - # this element has a line break before itself |
|
| 215 | - preg_match( |
|
| 216 | - '/<' . $structureBoxLikeElements . '(.*)>/Usi', |
|
| 217 | - $htmlArray[$x] |
|
| 218 | - ) || preg_match( |
|
| 219 | - '/<' . $structureBoxLikeElements . '(.*) \/>/Usi', |
|
| 220 | - $htmlArray[$x] |
|
| 221 | - ) || # one element before is a element that has a line break after |
|
| 222 | - preg_match( |
|
| 223 | - '/<\/' . $structureBoxLikeElements . '(.*)>/Usi', |
|
| 224 | - $htmlArray[$x - 1] |
|
| 225 | - ) || substr( |
|
| 226 | - $htmlArray[$x - 1], |
|
| 227 | - 0, |
|
| 228 | - 4 |
|
| 229 | - ) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
| 230 | - ) { |
|
| 231 | - $newline = true; |
|
| 232 | - } elseif ($this->formatType == 3 && ( // aestetic line break |
|
| 233 | - # this element has a line break before itself |
|
| 234 | - preg_match( |
|
| 235 | - '/<' . $esteticBoxLikeElements . '(.*)>/Usi', |
|
| 236 | - $htmlArray[$x] |
|
| 237 | - ) || preg_match( |
|
| 238 | - '/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', |
|
| 239 | - $htmlArray[$x] |
|
| 240 | - ) || # one element before is a element that has a line break after |
|
| 241 | - preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
| 242 | - $htmlArray[$x - 1], |
|
| 243 | - 0, |
|
| 244 | - 4 |
|
| 245 | - ) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
| 246 | - ) { |
|
| 247 | - $newline = true; |
|
| 248 | - } elseif ($this->formatType >= 4 && ( // logical line break |
|
| 249 | - # this element has a line break before itself |
|
| 250 | - preg_match( |
|
| 251 | - '/<' . $allBoxLikeElements . '(.*)>/Usi', |
|
| 252 | - $htmlArray[$x] |
|
| 253 | - ) || preg_match( |
|
| 254 | - '/<' . $allBoxLikeElements . '(.*) \/>/Usi', |
|
| 255 | - $htmlArray[$x] |
|
| 256 | - ) || # one element before is a element that has a line break after |
|
| 257 | - preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
| 258 | - $htmlArray[$x - 1], |
|
| 259 | - 0, |
|
| 260 | - 4 |
|
| 261 | - ) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
| 262 | - ) { |
|
| 263 | - $newline = true; |
|
| 264 | - } |
|
| 265 | - |
|
| 266 | - // count down a tab |
|
| 267 | - if (substr($htmlArray[$x], 0, 2) == '</') { |
|
| 268 | - $tabs--; |
|
| 269 | - } |
|
| 270 | - |
|
| 271 | - // add tabs and line breaks in front of the current tag |
|
| 272 | - if ($newline) { |
|
| 273 | - $html .= $this->newline; |
|
| 274 | - for ($y = 0; $y < $tabs; $y++) { |
|
| 275 | - $html .= $this->tab; |
|
| 276 | - } |
|
| 277 | - } |
|
| 278 | - |
|
| 279 | - // remove white spaces and line breaks and add current tag to the html-string |
|
| 280 | - if (substr($htmlArray[$x - 1], 0, 4) == '<pre' // remove white space after line ending in PRE / TEXTAREA / comment |
|
| 281 | - || substr($htmlArray[$x - 1], 0, 9) == '<textarea' || substr($htmlArray[$x - 1], 0, 4) == '<!--' |
|
| 282 | - ) { |
|
| 283 | - $html .= $this->rTrimLines($htmlArray[$x]); |
|
| 284 | - } elseif (substr($htmlArray[$x], 0, 9) == '<![CDATA[' // remove multiple white space in CDATA / XML |
|
| 285 | - || substr($htmlArray[$x], 0, 5) == '<?xml' |
|
| 286 | - ) { |
|
| 287 | - $html .= $this->killWhiteSpace($htmlArray[$x]); |
|
| 288 | - } else { // remove all line breaks |
|
| 289 | - $html .= $this->killLineBreaks($htmlArray[$x]); |
|
| 290 | - } |
|
| 291 | - |
|
| 292 | - // count up a tab |
|
| 293 | - if (substr($htmlArray[$x], 0, 1) == '<' && substr($htmlArray[$x], 1, 1) != '/') { |
|
| 294 | - if (substr($htmlArray[$x], 1, 1) != ' ' && substr($htmlArray[$x], 1, 3) != 'img' && substr( |
|
| 295 | - $htmlArray[$x], |
|
| 296 | - 1, |
|
| 297 | - 2 |
|
| 298 | - ) != 'br' && substr($htmlArray[$x], 1, 2) != 'hr' && substr( |
|
| 299 | - $htmlArray[$x], |
|
| 300 | - 1, |
|
| 301 | - 5 |
|
| 302 | - ) != 'input' && substr($htmlArray[$x], 1, 4) != 'link' && substr( |
|
| 303 | - $htmlArray[$x], |
|
| 304 | - 1, |
|
| 305 | - 4 |
|
| 306 | - ) != 'meta' && substr($htmlArray[$x], 1, 4) != 'col ' && substr( |
|
| 307 | - $htmlArray[$x], |
|
| 308 | - 1, |
|
| 309 | - 5 |
|
| 310 | - ) != 'frame' && substr($htmlArray[$x], 1, 7) != 'isindex' && substr( |
|
| 311 | - $htmlArray[$x], |
|
| 312 | - 1, |
|
| 313 | - 5 |
|
| 314 | - ) != 'param' && substr($htmlArray[$x], 1, 4) != 'area' && substr( |
|
| 315 | - $htmlArray[$x], |
|
| 316 | - 1, |
|
| 317 | - 4 |
|
| 318 | - ) != 'base' && substr($htmlArray[$x], 0, 2) != '<!' && substr($htmlArray[$x], 0, 5) != '<?xml' |
|
| 319 | - ) { |
|
| 320 | - $tabs++; |
|
| 321 | - } |
|
| 322 | - } |
|
| 323 | - } |
|
| 324 | - |
|
| 325 | - // Remove empty lines |
|
| 326 | - if ($this->formatType > 1) { |
|
| 327 | - $this->removeEmptyLines($html); |
|
| 328 | - } |
|
| 329 | - |
|
| 330 | - // Restore saved comments, styles and java-scripts |
|
| 331 | - for ($i = 0; $i < count($noFormat); $i++) { |
|
| 332 | - $noFormat[$i] = $this->rTrimLines($noFormat[$i]); // remove white space after line ending |
|
| 333 | - $html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html); |
|
| 334 | - } |
|
| 335 | - |
|
| 336 | - // include debug comment at the end |
|
| 337 | - if ($tabs != 0 && $this->debugComment === true) { |
|
| 338 | - $html .= '<!--' . $tabs . " open elements found-->\r\n"; |
|
| 339 | - } |
|
| 340 | - } |
|
| 341 | - |
|
| 342 | - /** |
|
| 343 | - * Remove ALL line breaks and multiple white space |
|
| 344 | - * |
|
| 345 | - * @param string $html |
|
| 346 | - * |
|
| 347 | - * @return string |
|
| 348 | - */ |
|
| 349 | - protected function killLineBreaks($html) |
|
| 350 | - { |
|
| 351 | - $html = $this->convNlOs($html); |
|
| 352 | - $html = str_replace($this->newline, "", $html); |
|
| 353 | - // remove double empty spaces |
|
| 354 | - if ($this->utf8 == true) { |
|
| 355 | - $html = preg_replace('/\s\s+/u', ' ', $html); |
|
| 356 | - } else { |
|
| 357 | - $html = preg_replace('/\s\s+/', ' ', $html); |
|
| 358 | - } |
|
| 359 | - return $html; |
|
| 360 | - } |
|
| 361 | - |
|
| 362 | - /** |
|
| 363 | - * Remove multiple white space, keeps line breaks |
|
| 364 | - * |
|
| 365 | - * @param string $html |
|
| 366 | - * |
|
| 367 | - * @return string |
|
| 368 | - */ |
|
| 369 | - protected function killWhiteSpace($html) |
|
| 370 | - { |
|
| 371 | - $html = $this->convNlOs($html); |
|
| 372 | - $temp = explode($this->newline, $html); |
|
| 373 | - for ($i = 0; $i < count($temp); $i++) { |
|
| 374 | - if (!trim($temp[$i])) { |
|
| 375 | - unset($temp[$i]); |
|
| 376 | - } else { |
|
| 377 | - $temp[$i] = trim($temp[$i]); |
|
| 378 | - $temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]); |
|
| 379 | - } |
|
| 380 | - } |
|
| 381 | - $html = implode($this->newline, $temp); |
|
| 382 | - return $html; |
|
| 383 | - } |
|
| 384 | - |
|
| 385 | - /** |
|
| 386 | - * Remove white space at the end of lines, keeps other white space and line breaks |
|
| 387 | - * |
|
| 388 | - * @param string $html |
|
| 389 | - * |
|
| 390 | - * @return string |
|
| 391 | - */ |
|
| 392 | - protected function rTrimLines($html) |
|
| 393 | - { |
|
| 394 | - $html = $this->convNlOs($html); |
|
| 395 | - $temp = explode($this->newline, $html); |
|
| 396 | - for ($i = 0; $i < count($temp); $i++) { |
|
| 397 | - $temp[$i] = rtrim($temp[$i]); |
|
| 398 | - } |
|
| 399 | - $html = implode($this->newline, $temp); |
|
| 400 | - return $html; |
|
| 401 | - } |
|
| 402 | - |
|
| 403 | - /** |
|
| 404 | - * Convert newlines according to the current OS |
|
| 405 | - * |
|
| 406 | - * @param string $html |
|
| 407 | - * |
|
| 408 | - * @return string |
|
| 409 | - */ |
|
| 410 | - protected function convNlOs($html) |
|
| 411 | - { |
|
| 412 | - $html = preg_replace("(\r\n|\n|\r)", $this->newline, $html); |
|
| 413 | - return $html; |
|
| 414 | - } |
|
| 415 | - |
|
| 416 | - /** |
|
| 417 | - * Remove tabs and empty spaces before and after lines, transforms linebreaks system conform |
|
| 418 | - * |
|
| 419 | - * @param string $html Html-Code |
|
| 420 | - * |
|
| 421 | - * @return void |
|
| 422 | - */ |
|
| 423 | - protected function trimLines(&$html) |
|
| 424 | - { |
|
| 425 | - $html = str_replace("\t", "", $html); |
|
| 426 | - // convert newlines according to the current OS |
|
| 427 | - if (TYPO3_OS == "WIN") { |
|
| 428 | - $html = str_replace("\n", "\r\n", $html); |
|
| 429 | - } else { |
|
| 430 | - $html = str_replace("\r\n", "\n", $html); |
|
| 431 | - } |
|
| 432 | - $temp = explode($this->newline, $html); |
|
| 433 | - $temp = array_map('trim', $temp); |
|
| 434 | - $html = implode($this->newline, $temp); |
|
| 435 | - unset($temp); |
|
| 436 | - } |
|
| 437 | - |
|
| 438 | - /** |
|
| 439 | - * Remove empty lines |
|
| 440 | - * |
|
| 441 | - * @param string $html |
|
| 442 | - * |
|
| 443 | - * @return void |
|
| 444 | - */ |
|
| 445 | - protected function removeEmptyLines(&$html) |
|
| 446 | - { |
|
| 447 | - $temp = explode($this->newline, $html); |
|
| 448 | - $result = []; |
|
| 449 | - for ($i = 0; $i < count($temp); ++$i) { |
|
| 450 | - if ("" == trim($temp[$i])) { |
|
| 451 | - continue; |
|
| 452 | - } |
|
| 453 | - $result[] = $temp[$i]; |
|
| 454 | - } |
|
| 455 | - $html = implode($this->newline, $result); |
|
| 456 | - } |
|
| 457 | - |
|
| 458 | - /** |
|
| 459 | - * Remove new lines where unnecessary |
|
| 460 | - * spares line breaks within: pre, textarea, ... |
|
| 461 | - * |
|
| 462 | - * @param string $html |
|
| 463 | - * |
|
| 464 | - * @return void |
|
| 465 | - */ |
|
| 466 | - protected function removeNewLines(&$html) |
|
| 467 | - { |
|
| 468 | - $splitArray = [ |
|
| 469 | - 'textarea', |
|
| 470 | - 'pre' |
|
| 471 | - ]; // eventuell auch: span, script, style |
|
| 472 | - $peaces = preg_split('#(<(' . implode('|', $splitArray) . ').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
| 473 | - $html = ""; |
|
| 474 | - for ($i = 0; $i < count($peaces); $i++) { |
|
| 475 | - if (($i + 1) % 3 == 0) { |
|
| 476 | - continue; |
|
| 477 | - } |
|
| 478 | - $html .= (($i - 1) % 3 != 0) ? $this->killLineBreaks($peaces[$i]) : $peaces[$i]; |
|
| 479 | - } |
|
| 480 | - } |
|
| 481 | - |
|
| 482 | - /** |
|
| 483 | - * Remove obsolete link schema |
|
| 484 | - * |
|
| 485 | - * @param string $html |
|
| 486 | - * |
|
| 487 | - * @return void |
|
| 488 | - */ |
|
| 489 | - protected function removeLinkSchema(&$html) |
|
| 490 | - { |
|
| 491 | - $html = preg_replace("/<link rel=\"?schema.dc\"?.+?>/is", "", $html); |
|
| 492 | - } |
|
| 493 | - |
|
| 494 | - /** |
|
| 495 | - * Remove empty alt tags |
|
| 496 | - * |
|
| 497 | - * @param string $html |
|
| 498 | - * |
|
| 499 | - * @return void |
|
| 500 | - */ |
|
| 501 | - protected function removeEmptyAltAtr(&$html) |
|
| 502 | - { |
|
| 503 | - $html = str_replace("alt=\"\"", "", $html); |
|
| 504 | - } |
|
| 505 | - |
|
| 506 | - /** |
|
| 507 | - * Remove broken links in <a> tags |
|
| 508 | - * |
|
| 509 | - * @param string $html |
|
| 510 | - * |
|
| 511 | - * @return void |
|
| 512 | - */ |
|
| 513 | - protected function removeRealUrlBrokenRootLink(&$html) |
|
| 514 | - { |
|
| 515 | - $html = str_replace('href=".html"', 'href=""', $html); |
|
| 516 | - } |
|
| 517 | - |
|
| 518 | - /** |
|
| 519 | - * Include configured header comment in HTML content block |
|
| 520 | - * |
|
| 521 | - * @param $html |
|
| 522 | - */ |
|
| 523 | - public function includeHeaderComment(&$html) |
|
| 524 | - { |
|
| 525 | - if (!empty($this->headerComment)) { |
|
| 526 | - $html = preg_replace_callback('/<meta http-equiv(.*)>/Usi', function ($matches) { |
|
| 527 | - return trim($matches[0] . $this->newline . $this->tab . $this->tab . '<!-- ' . $this->headerComment . '-->'); |
|
| 528 | - }, $html, 1); |
|
| 529 | - } |
|
| 530 | - } |
|
| 16 | + /** |
|
| 17 | + * Enable Debug comment in footer |
|
| 18 | + * |
|
| 19 | + * @var boolean |
|
| 20 | + */ |
|
| 21 | + protected $debugComment = false; |
|
| 22 | + |
|
| 23 | + /** |
|
| 24 | + * Format Type |
|
| 25 | + * |
|
| 26 | + * @var integer |
|
| 27 | + */ |
|
| 28 | + protected $formatType = 2; |
|
| 29 | + |
|
| 30 | + /** |
|
| 31 | + * Tab character |
|
| 32 | + * |
|
| 33 | + * @var string |
|
| 34 | + */ |
|
| 35 | + protected $tab = "\t"; |
|
| 36 | + |
|
| 37 | + /** |
|
| 38 | + * Newline character |
|
| 39 | + * |
|
| 40 | + * @var string |
|
| 41 | + */ |
|
| 42 | + protected $newline = "\n"; |
|
| 43 | + |
|
| 44 | + /** |
|
| 45 | + * Enable/disable UTF8 support |
|
| 46 | + * |
|
| 47 | + * @var boolean |
|
| 48 | + */ |
|
| 49 | + protected $utf8 = true; |
|
| 50 | + |
|
| 51 | + /** |
|
| 52 | + * Configured extra header comment |
|
| 53 | + * |
|
| 54 | + * @var string |
|
| 55 | + */ |
|
| 56 | + protected $headerComment = ''; |
|
| 57 | + |
|
| 58 | + /** |
|
| 59 | + * Set variables based on given config |
|
| 60 | + * |
|
| 61 | + * @param array $config |
|
| 62 | + * |
|
| 63 | + * @return void |
|
| 64 | + */ |
|
| 65 | + public function setVariables(array $config) |
|
| 66 | + { |
|
| 67 | + switch (TYPO3_OS) { // set newline |
|
| 68 | + case 'WIN': |
|
| 69 | + $this->newline = "\r\n"; |
|
| 70 | + break; |
|
| 71 | + default: |
|
| 72 | + $this->newline = "\n"; |
|
| 73 | + } |
|
| 74 | + |
|
| 75 | + if (!empty($config)) { |
|
| 76 | + if ($config['formatHtml'] && is_numeric($config['formatHtml'])) { |
|
| 77 | + $this->formatType = (int)$config['formatHtml']; |
|
| 78 | + } |
|
| 79 | + |
|
| 80 | + if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) { |
|
| 81 | + $this->tab = str_pad('', $config['formatHtml.']['tabSize'], ' '); |
|
| 82 | + } |
|
| 83 | + |
|
| 84 | + if (isset($config['enable_utf'])) { |
|
| 85 | + $this->utf8 = (bool)$config['enable_utf-8_support']; |
|
| 86 | + } |
|
| 87 | + |
|
| 88 | + if (isset($config['formatHtml.']['debugComment'])) { |
|
| 89 | + $this->debugComment = (bool)$config['formatHtml.']['debugComment']; |
|
| 90 | + } |
|
| 91 | + |
|
| 92 | + if (isset($config['headerComment'])) { |
|
| 93 | + $this->headerComment = $config['headerComment']; |
|
| 94 | + } |
|
| 95 | + } |
|
| 96 | + } |
|
| 97 | + |
|
| 98 | + /** |
|
| 99 | + * Clean given HTML with formatter |
|
| 100 | + * |
|
| 101 | + * @param string $html |
|
| 102 | + * @param array $config |
|
| 103 | + * |
|
| 104 | + * @return void |
|
| 105 | + */ |
|
| 106 | + public function clean(&$html, $config = []) |
|
| 107 | + { |
|
| 108 | + if (!empty($config)) { |
|
| 109 | + if ((bool)$config['enabled'] === false) { |
|
| 110 | + return; |
|
| 111 | + } |
|
| 112 | + |
|
| 113 | + $this->setVariables($config); |
|
| 114 | + } |
|
| 115 | + |
|
| 116 | + $manipulations = []; |
|
| 117 | + |
|
| 118 | + if (isset($config['removeGenerator']) && (bool)$config['removeGenerator']) { |
|
| 119 | + $manipulations['removeGenerator'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveGenerator'); |
|
| 120 | + } |
|
| 121 | + |
|
| 122 | + if (isset($config['removeComments']) && (bool)$config['removeComments']) { |
|
| 123 | + $manipulations['removeComments'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveComments'); |
|
| 124 | + } |
|
| 125 | + |
|
| 126 | + if (isset($config['removeBlurScript']) && (bool)$config['removeBlurScript']) { |
|
| 127 | + $manipulations['removeBlurScript'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveBlurScript'); |
|
| 128 | + } |
|
| 129 | + |
|
| 130 | + if (!empty($this->headerComment)) { |
|
| 131 | + $this->includeHeaderComment($html); |
|
| 132 | + } |
|
| 133 | + |
|
| 134 | + foreach ($manipulations as $key => $manipulation) { |
|
| 135 | + /** @var ManipulationInterface $manipulation */ |
|
| 136 | + $configuration = isset($config[$key . '.']) && is_array($config[$key . '.']) ? $config[$key . '.'] : []; |
|
| 137 | + $html = $manipulation->manipulate($html, $configuration); |
|
| 138 | + } |
|
| 139 | + |
|
| 140 | + if ($this->formatType) { |
|
| 141 | + $this->formatHtml($html); |
|
| 142 | + } |
|
| 143 | + } |
|
| 144 | + |
|
| 145 | + /** |
|
| 146 | + * Formats the (X)HTML code: |
|
| 147 | + * - taps according to the hirarchy of the tags |
|
| 148 | + * - removes empty spaces between tags |
|
| 149 | + * - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..) |
|
| 150 | + * choose from five options: |
|
| 151 | + * 0 => off |
|
| 152 | + * 1 => no line break at all (code in one line) |
|
| 153 | + * 2 => minimalistic line breaks (structure defining box-elements) |
|
| 154 | + * 3 => aesthetic line breaks (important box-elements) |
|
| 155 | + * 4 => logic line breaks (all box-elements) |
|
| 156 | + * 5 => max line breaks (all elements) |
|
| 157 | + * |
|
| 158 | + * @param string $html |
|
| 159 | + * |
|
| 160 | + * @return void |
|
| 161 | + */ |
|
| 162 | + protected function formatHtml(&$html) |
|
| 163 | + { |
|
| 164 | + // Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers |
|
| 165 | + preg_match_all( |
|
| 166 | + '/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im', |
|
| 167 | + $html, |
|
| 168 | + $matches |
|
| 169 | + ); |
|
| 170 | + $noFormat = $matches[0]; // do not format these block elements |
|
| 171 | + for ($i = 0; $i < count($noFormat); $i++) { |
|
| 172 | + $html = str_replace($noFormat[$i], "\n<!-- ELEMENT $i -->", $html); |
|
| 173 | + } |
|
| 174 | + |
|
| 175 | + // define box elements for formatting |
|
| 176 | + $trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section'; |
|
| 177 | + $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup'; |
|
| 178 | + $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script'; |
|
| 179 | + $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--'; |
|
| 180 | + $allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')'; |
|
| 181 | + $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)'; |
|
| 182 | + $structureBoxLikeElements = '(?>html|head|body|div|!--)'; |
|
| 183 | + |
|
| 184 | + // split html into it's elements |
|
| 185 | + $htmlArrayTemp = preg_split( |
|
| 186 | + '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', |
|
| 187 | + $html, |
|
| 188 | + -1, |
|
| 189 | + PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY |
|
| 190 | + ); |
|
| 191 | + // remove empty lines |
|
| 192 | + $htmlArray = ['']; |
|
| 193 | + $z = 1; |
|
| 194 | + for ($x = 0; $x < count($htmlArrayTemp); $x++) { |
|
| 195 | + $t = trim($htmlArrayTemp[$x]); |
|
| 196 | + if ($t !== '') { |
|
| 197 | + $htmlArray[$z] = $htmlArrayTemp[$x]; |
|
| 198 | + $z++; |
|
| 199 | + } else { |
|
| 200 | + $htmlArray[$z] = ' '; |
|
| 201 | + $z++; |
|
| 202 | + } |
|
| 203 | + } |
|
| 204 | + |
|
| 205 | + // rebuild html |
|
| 206 | + $html = ''; |
|
| 207 | + $tabs = 0; |
|
| 208 | + for ($x = 0; $x < count($htmlArray); $x++) { |
|
| 209 | + // check if the element should stand in a new line |
|
| 210 | + $newline = false; |
|
| 211 | + if (substr($htmlArray[$x - 1], 0, 5) == '<?xml') { |
|
| 212 | + $newline = true; |
|
| 213 | + } elseif ($this->formatType == 2 && ( // minimalistic line break |
|
| 214 | + # this element has a line break before itself |
|
| 215 | + preg_match( |
|
| 216 | + '/<' . $structureBoxLikeElements . '(.*)>/Usi', |
|
| 217 | + $htmlArray[$x] |
|
| 218 | + ) || preg_match( |
|
| 219 | + '/<' . $structureBoxLikeElements . '(.*) \/>/Usi', |
|
| 220 | + $htmlArray[$x] |
|
| 221 | + ) || # one element before is a element that has a line break after |
|
| 222 | + preg_match( |
|
| 223 | + '/<\/' . $structureBoxLikeElements . '(.*)>/Usi', |
|
| 224 | + $htmlArray[$x - 1] |
|
| 225 | + ) || substr( |
|
| 226 | + $htmlArray[$x - 1], |
|
| 227 | + 0, |
|
| 228 | + 4 |
|
| 229 | + ) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
| 230 | + ) { |
|
| 231 | + $newline = true; |
|
| 232 | + } elseif ($this->formatType == 3 && ( // aestetic line break |
|
| 233 | + # this element has a line break before itself |
|
| 234 | + preg_match( |
|
| 235 | + '/<' . $esteticBoxLikeElements . '(.*)>/Usi', |
|
| 236 | + $htmlArray[$x] |
|
| 237 | + ) || preg_match( |
|
| 238 | + '/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', |
|
| 239 | + $htmlArray[$x] |
|
| 240 | + ) || # one element before is a element that has a line break after |
|
| 241 | + preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
| 242 | + $htmlArray[$x - 1], |
|
| 243 | + 0, |
|
| 244 | + 4 |
|
| 245 | + ) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
| 246 | + ) { |
|
| 247 | + $newline = true; |
|
| 248 | + } elseif ($this->formatType >= 4 && ( // logical line break |
|
| 249 | + # this element has a line break before itself |
|
| 250 | + preg_match( |
|
| 251 | + '/<' . $allBoxLikeElements . '(.*)>/Usi', |
|
| 252 | + $htmlArray[$x] |
|
| 253 | + ) || preg_match( |
|
| 254 | + '/<' . $allBoxLikeElements . '(.*) \/>/Usi', |
|
| 255 | + $htmlArray[$x] |
|
| 256 | + ) || # one element before is a element that has a line break after |
|
| 257 | + preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
| 258 | + $htmlArray[$x - 1], |
|
| 259 | + 0, |
|
| 260 | + 4 |
|
| 261 | + ) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
| 262 | + ) { |
|
| 263 | + $newline = true; |
|
| 264 | + } |
|
| 265 | + |
|
| 266 | + // count down a tab |
|
| 267 | + if (substr($htmlArray[$x], 0, 2) == '</') { |
|
| 268 | + $tabs--; |
|
| 269 | + } |
|
| 270 | + |
|
| 271 | + // add tabs and line breaks in front of the current tag |
|
| 272 | + if ($newline) { |
|
| 273 | + $html .= $this->newline; |
|
| 274 | + for ($y = 0; $y < $tabs; $y++) { |
|
| 275 | + $html .= $this->tab; |
|
| 276 | + } |
|
| 277 | + } |
|
| 278 | + |
|
| 279 | + // remove white spaces and line breaks and add current tag to the html-string |
|
| 280 | + if (substr($htmlArray[$x - 1], 0, 4) == '<pre' // remove white space after line ending in PRE / TEXTAREA / comment |
|
| 281 | + || substr($htmlArray[$x - 1], 0, 9) == '<textarea' || substr($htmlArray[$x - 1], 0, 4) == '<!--' |
|
| 282 | + ) { |
|
| 283 | + $html .= $this->rTrimLines($htmlArray[$x]); |
|
| 284 | + } elseif (substr($htmlArray[$x], 0, 9) == '<![CDATA[' // remove multiple white space in CDATA / XML |
|
| 285 | + || substr($htmlArray[$x], 0, 5) == '<?xml' |
|
| 286 | + ) { |
|
| 287 | + $html .= $this->killWhiteSpace($htmlArray[$x]); |
|
| 288 | + } else { // remove all line breaks |
|
| 289 | + $html .= $this->killLineBreaks($htmlArray[$x]); |
|
| 290 | + } |
|
| 291 | + |
|
| 292 | + // count up a tab |
|
| 293 | + if (substr($htmlArray[$x], 0, 1) == '<' && substr($htmlArray[$x], 1, 1) != '/') { |
|
| 294 | + if (substr($htmlArray[$x], 1, 1) != ' ' && substr($htmlArray[$x], 1, 3) != 'img' && substr( |
|
| 295 | + $htmlArray[$x], |
|
| 296 | + 1, |
|
| 297 | + 2 |
|
| 298 | + ) != 'br' && substr($htmlArray[$x], 1, 2) != 'hr' && substr( |
|
| 299 | + $htmlArray[$x], |
|
| 300 | + 1, |
|
| 301 | + 5 |
|
| 302 | + ) != 'input' && substr($htmlArray[$x], 1, 4) != 'link' && substr( |
|
| 303 | + $htmlArray[$x], |
|
| 304 | + 1, |
|
| 305 | + 4 |
|
| 306 | + ) != 'meta' && substr($htmlArray[$x], 1, 4) != 'col ' && substr( |
|
| 307 | + $htmlArray[$x], |
|
| 308 | + 1, |
|
| 309 | + 5 |
|
| 310 | + ) != 'frame' && substr($htmlArray[$x], 1, 7) != 'isindex' && substr( |
|
| 311 | + $htmlArray[$x], |
|
| 312 | + 1, |
|
| 313 | + 5 |
|
| 314 | + ) != 'param' && substr($htmlArray[$x], 1, 4) != 'area' && substr( |
|
| 315 | + $htmlArray[$x], |
|
| 316 | + 1, |
|
| 317 | + 4 |
|
| 318 | + ) != 'base' && substr($htmlArray[$x], 0, 2) != '<!' && substr($htmlArray[$x], 0, 5) != '<?xml' |
|
| 319 | + ) { |
|
| 320 | + $tabs++; |
|
| 321 | + } |
|
| 322 | + } |
|
| 323 | + } |
|
| 324 | + |
|
| 325 | + // Remove empty lines |
|
| 326 | + if ($this->formatType > 1) { |
|
| 327 | + $this->removeEmptyLines($html); |
|
| 328 | + } |
|
| 329 | + |
|
| 330 | + // Restore saved comments, styles and java-scripts |
|
| 331 | + for ($i = 0; $i < count($noFormat); $i++) { |
|
| 332 | + $noFormat[$i] = $this->rTrimLines($noFormat[$i]); // remove white space after line ending |
|
| 333 | + $html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html); |
|
| 334 | + } |
|
| 335 | + |
|
| 336 | + // include debug comment at the end |
|
| 337 | + if ($tabs != 0 && $this->debugComment === true) { |
|
| 338 | + $html .= '<!--' . $tabs . " open elements found-->\r\n"; |
|
| 339 | + } |
|
| 340 | + } |
|
| 341 | + |
|
| 342 | + /** |
|
| 343 | + * Remove ALL line breaks and multiple white space |
|
| 344 | + * |
|
| 345 | + * @param string $html |
|
| 346 | + * |
|
| 347 | + * @return string |
|
| 348 | + */ |
|
| 349 | + protected function killLineBreaks($html) |
|
| 350 | + { |
|
| 351 | + $html = $this->convNlOs($html); |
|
| 352 | + $html = str_replace($this->newline, "", $html); |
|
| 353 | + // remove double empty spaces |
|
| 354 | + if ($this->utf8 == true) { |
|
| 355 | + $html = preg_replace('/\s\s+/u', ' ', $html); |
|
| 356 | + } else { |
|
| 357 | + $html = preg_replace('/\s\s+/', ' ', $html); |
|
| 358 | + } |
|
| 359 | + return $html; |
|
| 360 | + } |
|
| 361 | + |
|
| 362 | + /** |
|
| 363 | + * Remove multiple white space, keeps line breaks |
|
| 364 | + * |
|
| 365 | + * @param string $html |
|
| 366 | + * |
|
| 367 | + * @return string |
|
| 368 | + */ |
|
| 369 | + protected function killWhiteSpace($html) |
|
| 370 | + { |
|
| 371 | + $html = $this->convNlOs($html); |
|
| 372 | + $temp = explode($this->newline, $html); |
|
| 373 | + for ($i = 0; $i < count($temp); $i++) { |
|
| 374 | + if (!trim($temp[$i])) { |
|
| 375 | + unset($temp[$i]); |
|
| 376 | + } else { |
|
| 377 | + $temp[$i] = trim($temp[$i]); |
|
| 378 | + $temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]); |
|
| 379 | + } |
|
| 380 | + } |
|
| 381 | + $html = implode($this->newline, $temp); |
|
| 382 | + return $html; |
|
| 383 | + } |
|
| 384 | + |
|
| 385 | + /** |
|
| 386 | + * Remove white space at the end of lines, keeps other white space and line breaks |
|
| 387 | + * |
|
| 388 | + * @param string $html |
|
| 389 | + * |
|
| 390 | + * @return string |
|
| 391 | + */ |
|
| 392 | + protected function rTrimLines($html) |
|
| 393 | + { |
|
| 394 | + $html = $this->convNlOs($html); |
|
| 395 | + $temp = explode($this->newline, $html); |
|
| 396 | + for ($i = 0; $i < count($temp); $i++) { |
|
| 397 | + $temp[$i] = rtrim($temp[$i]); |
|
| 398 | + } |
|
| 399 | + $html = implode($this->newline, $temp); |
|
| 400 | + return $html; |
|
| 401 | + } |
|
| 402 | + |
|
| 403 | + /** |
|
| 404 | + * Convert newlines according to the current OS |
|
| 405 | + * |
|
| 406 | + * @param string $html |
|
| 407 | + * |
|
| 408 | + * @return string |
|
| 409 | + */ |
|
| 410 | + protected function convNlOs($html) |
|
| 411 | + { |
|
| 412 | + $html = preg_replace("(\r\n|\n|\r)", $this->newline, $html); |
|
| 413 | + return $html; |
|
| 414 | + } |
|
| 415 | + |
|
| 416 | + /** |
|
| 417 | + * Remove tabs and empty spaces before and after lines, transforms linebreaks system conform |
|
| 418 | + * |
|
| 419 | + * @param string $html Html-Code |
|
| 420 | + * |
|
| 421 | + * @return void |
|
| 422 | + */ |
|
| 423 | + protected function trimLines(&$html) |
|
| 424 | + { |
|
| 425 | + $html = str_replace("\t", "", $html); |
|
| 426 | + // convert newlines according to the current OS |
|
| 427 | + if (TYPO3_OS == "WIN") { |
|
| 428 | + $html = str_replace("\n", "\r\n", $html); |
|
| 429 | + } else { |
|
| 430 | + $html = str_replace("\r\n", "\n", $html); |
|
| 431 | + } |
|
| 432 | + $temp = explode($this->newline, $html); |
|
| 433 | + $temp = array_map('trim', $temp); |
|
| 434 | + $html = implode($this->newline, $temp); |
|
| 435 | + unset($temp); |
|
| 436 | + } |
|
| 437 | + |
|
| 438 | + /** |
|
| 439 | + * Remove empty lines |
|
| 440 | + * |
|
| 441 | + * @param string $html |
|
| 442 | + * |
|
| 443 | + * @return void |
|
| 444 | + */ |
|
| 445 | + protected function removeEmptyLines(&$html) |
|
| 446 | + { |
|
| 447 | + $temp = explode($this->newline, $html); |
|
| 448 | + $result = []; |
|
| 449 | + for ($i = 0; $i < count($temp); ++$i) { |
|
| 450 | + if ("" == trim($temp[$i])) { |
|
| 451 | + continue; |
|
| 452 | + } |
|
| 453 | + $result[] = $temp[$i]; |
|
| 454 | + } |
|
| 455 | + $html = implode($this->newline, $result); |
|
| 456 | + } |
|
| 457 | + |
|
| 458 | + /** |
|
| 459 | + * Remove new lines where unnecessary |
|
| 460 | + * spares line breaks within: pre, textarea, ... |
|
| 461 | + * |
|
| 462 | + * @param string $html |
|
| 463 | + * |
|
| 464 | + * @return void |
|
| 465 | + */ |
|
| 466 | + protected function removeNewLines(&$html) |
|
| 467 | + { |
|
| 468 | + $splitArray = [ |
|
| 469 | + 'textarea', |
|
| 470 | + 'pre' |
|
| 471 | + ]; // eventuell auch: span, script, style |
|
| 472 | + $peaces = preg_split('#(<(' . implode('|', $splitArray) . ').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
| 473 | + $html = ""; |
|
| 474 | + for ($i = 0; $i < count($peaces); $i++) { |
|
| 475 | + if (($i + 1) % 3 == 0) { |
|
| 476 | + continue; |
|
| 477 | + } |
|
| 478 | + $html .= (($i - 1) % 3 != 0) ? $this->killLineBreaks($peaces[$i]) : $peaces[$i]; |
|
| 479 | + } |
|
| 480 | + } |
|
| 481 | + |
|
| 482 | + /** |
|
| 483 | + * Remove obsolete link schema |
|
| 484 | + * |
|
| 485 | + * @param string $html |
|
| 486 | + * |
|
| 487 | + * @return void |
|
| 488 | + */ |
|
| 489 | + protected function removeLinkSchema(&$html) |
|
| 490 | + { |
|
| 491 | + $html = preg_replace("/<link rel=\"?schema.dc\"?.+?>/is", "", $html); |
|
| 492 | + } |
|
| 493 | + |
|
| 494 | + /** |
|
| 495 | + * Remove empty alt tags |
|
| 496 | + * |
|
| 497 | + * @param string $html |
|
| 498 | + * |
|
| 499 | + * @return void |
|
| 500 | + */ |
|
| 501 | + protected function removeEmptyAltAtr(&$html) |
|
| 502 | + { |
|
| 503 | + $html = str_replace("alt=\"\"", "", $html); |
|
| 504 | + } |
|
| 505 | + |
|
| 506 | + /** |
|
| 507 | + * Remove broken links in <a> tags |
|
| 508 | + * |
|
| 509 | + * @param string $html |
|
| 510 | + * |
|
| 511 | + * @return void |
|
| 512 | + */ |
|
| 513 | + protected function removeRealUrlBrokenRootLink(&$html) |
|
| 514 | + { |
|
| 515 | + $html = str_replace('href=".html"', 'href=""', $html); |
|
| 516 | + } |
|
| 517 | + |
|
| 518 | + /** |
|
| 519 | + * Include configured header comment in HTML content block |
|
| 520 | + * |
|
| 521 | + * @param $html |
|
| 522 | + */ |
|
| 523 | + public function includeHeaderComment(&$html) |
|
| 524 | + { |
|
| 525 | + if (!empty($this->headerComment)) { |
|
| 526 | + $html = preg_replace_callback('/<meta http-equiv(.*)>/Usi', function ($matches) { |
|
| 527 | + return trim($matches[0] . $this->newline . $this->tab . $this->tab . '<!-- ' . $this->headerComment . '-->'); |
|
| 528 | + }, $html, 1); |
|
| 529 | + } |
|
| 530 | + } |
|
| 531 | 531 | } |
@@ -74,7 +74,7 @@ discard block |
||
| 74 | 74 | |
| 75 | 75 | if (!empty($config)) { |
| 76 | 76 | if ($config['formatHtml'] && is_numeric($config['formatHtml'])) { |
| 77 | - $this->formatType = (int)$config['formatHtml']; |
|
| 77 | + $this->formatType = (int) $config['formatHtml']; |
|
| 78 | 78 | } |
| 79 | 79 | |
| 80 | 80 | if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) { |
@@ -82,11 +82,11 @@ discard block |
||
| 82 | 82 | } |
| 83 | 83 | |
| 84 | 84 | if (isset($config['enable_utf'])) { |
| 85 | - $this->utf8 = (bool)$config['enable_utf-8_support']; |
|
| 85 | + $this->utf8 = (bool) $config['enable_utf-8_support']; |
|
| 86 | 86 | } |
| 87 | 87 | |
| 88 | 88 | if (isset($config['formatHtml.']['debugComment'])) { |
| 89 | - $this->debugComment = (bool)$config['formatHtml.']['debugComment']; |
|
| 89 | + $this->debugComment = (bool) $config['formatHtml.']['debugComment']; |
|
| 90 | 90 | } |
| 91 | 91 | |
| 92 | 92 | if (isset($config['headerComment'])) { |
@@ -106,7 +106,7 @@ discard block |
||
| 106 | 106 | public function clean(&$html, $config = []) |
| 107 | 107 | { |
| 108 | 108 | if (!empty($config)) { |
| 109 | - if ((bool)$config['enabled'] === false) { |
|
| 109 | + if ((bool) $config['enabled'] === false) { |
|
| 110 | 110 | return; |
| 111 | 111 | } |
| 112 | 112 | |
@@ -115,15 +115,15 @@ discard block |
||
| 115 | 115 | |
| 116 | 116 | $manipulations = []; |
| 117 | 117 | |
| 118 | - if (isset($config['removeGenerator']) && (bool)$config['removeGenerator']) { |
|
| 118 | + if (isset($config['removeGenerator']) && (bool) $config['removeGenerator']) { |
|
| 119 | 119 | $manipulations['removeGenerator'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveGenerator'); |
| 120 | 120 | } |
| 121 | 121 | |
| 122 | - if (isset($config['removeComments']) && (bool)$config['removeComments']) { |
|
| 122 | + if (isset($config['removeComments']) && (bool) $config['removeComments']) { |
|
| 123 | 123 | $manipulations['removeComments'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveComments'); |
| 124 | 124 | } |
| 125 | 125 | |
| 126 | - if (isset($config['removeBlurScript']) && (bool)$config['removeBlurScript']) { |
|
| 126 | + if (isset($config['removeBlurScript']) && (bool) $config['removeBlurScript']) { |
|
| 127 | 127 | $manipulations['removeBlurScript'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveBlurScript'); |
| 128 | 128 | } |
| 129 | 129 | |
@@ -133,7 +133,7 @@ discard block |
||
| 133 | 133 | |
| 134 | 134 | foreach ($manipulations as $key => $manipulation) { |
| 135 | 135 | /** @var ManipulationInterface $manipulation */ |
| 136 | - $configuration = isset($config[$key . '.']) && is_array($config[$key . '.']) ? $config[$key . '.'] : []; |
|
| 136 | + $configuration = isset($config[$key.'.']) && is_array($config[$key.'.']) ? $config[$key.'.'] : []; |
|
| 137 | 137 | $html = $manipulation->manipulate($html, $configuration); |
| 138 | 138 | } |
| 139 | 139 | |
@@ -177,7 +177,7 @@ discard block |
||
| 177 | 177 | $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup'; |
| 178 | 178 | $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script'; |
| 179 | 179 | $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--'; |
| 180 | - $allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')'; |
|
| 180 | + $allBoxLikeElements = '(?>'.$trueBoxElements.'|'.$functionalBoxElements.'|'.$usableBoxElements.'|'.$imagineBoxElements.')'; |
|
| 181 | 181 | $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)'; |
| 182 | 182 | $structureBoxLikeElements = '(?>html|head|body|div|!--)'; |
| 183 | 183 | |
@@ -186,7 +186,7 @@ discard block |
||
| 186 | 186 | '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', |
| 187 | 187 | $html, |
| 188 | 188 | -1, |
| 189 | - PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY |
|
| 189 | + PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY |
|
| 190 | 190 | ); |
| 191 | 191 | // remove empty lines |
| 192 | 192 | $htmlArray = ['']; |
@@ -213,52 +213,52 @@ discard block |
||
| 213 | 213 | } elseif ($this->formatType == 2 && ( // minimalistic line break |
| 214 | 214 | # this element has a line break before itself |
| 215 | 215 | preg_match( |
| 216 | - '/<' . $structureBoxLikeElements . '(.*)>/Usi', |
|
| 216 | + '/<'.$structureBoxLikeElements.'(.*)>/Usi', |
|
| 217 | 217 | $htmlArray[$x] |
| 218 | 218 | ) || preg_match( |
| 219 | - '/<' . $structureBoxLikeElements . '(.*) \/>/Usi', |
|
| 219 | + '/<'.$structureBoxLikeElements.'(.*) \/>/Usi', |
|
| 220 | 220 | $htmlArray[$x] |
| 221 | 221 | ) || # one element before is a element that has a line break after |
| 222 | 222 | preg_match( |
| 223 | - '/<\/' . $structureBoxLikeElements . '(.*)>/Usi', |
|
| 223 | + '/<\/'.$structureBoxLikeElements.'(.*)>/Usi', |
|
| 224 | 224 | $htmlArray[$x - 1] |
| 225 | 225 | ) || substr( |
| 226 | 226 | $htmlArray[$x - 1], |
| 227 | 227 | 0, |
| 228 | 228 | 4 |
| 229 | - ) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
| 229 | + ) == '<!--' || preg_match('/<'.$structureBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
| 230 | 230 | ) { |
| 231 | 231 | $newline = true; |
| 232 | 232 | } elseif ($this->formatType == 3 && ( // aestetic line break |
| 233 | 233 | # this element has a line break before itself |
| 234 | 234 | preg_match( |
| 235 | - '/<' . $esteticBoxLikeElements . '(.*)>/Usi', |
|
| 235 | + '/<'.$esteticBoxLikeElements.'(.*)>/Usi', |
|
| 236 | 236 | $htmlArray[$x] |
| 237 | 237 | ) || preg_match( |
| 238 | - '/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', |
|
| 238 | + '/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', |
|
| 239 | 239 | $htmlArray[$x] |
| 240 | 240 | ) || # one element before is a element that has a line break after |
| 241 | - preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
| 241 | + preg_match('/<\/'.$esteticBoxLikeElements.'(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
| 242 | 242 | $htmlArray[$x - 1], |
| 243 | 243 | 0, |
| 244 | 244 | 4 |
| 245 | - ) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
| 245 | + ) == '<!--' || preg_match('/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
| 246 | 246 | ) { |
| 247 | 247 | $newline = true; |
| 248 | 248 | } elseif ($this->formatType >= 4 && ( // logical line break |
| 249 | 249 | # this element has a line break before itself |
| 250 | 250 | preg_match( |
| 251 | - '/<' . $allBoxLikeElements . '(.*)>/Usi', |
|
| 251 | + '/<'.$allBoxLikeElements.'(.*)>/Usi', |
|
| 252 | 252 | $htmlArray[$x] |
| 253 | 253 | ) || preg_match( |
| 254 | - '/<' . $allBoxLikeElements . '(.*) \/>/Usi', |
|
| 254 | + '/<'.$allBoxLikeElements.'(.*) \/>/Usi', |
|
| 255 | 255 | $htmlArray[$x] |
| 256 | 256 | ) || # one element before is a element that has a line break after |
| 257 | - preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
| 257 | + preg_match('/<\/'.$allBoxLikeElements.'(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
| 258 | 258 | $htmlArray[$x - 1], |
| 259 | 259 | 0, |
| 260 | 260 | 4 |
| 261 | - ) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
| 261 | + ) == '<!--' || preg_match('/<'.$allBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
| 262 | 262 | ) { |
| 263 | 263 | $newline = true; |
| 264 | 264 | } |
@@ -335,7 +335,7 @@ discard block |
||
| 335 | 335 | |
| 336 | 336 | // include debug comment at the end |
| 337 | 337 | if ($tabs != 0 && $this->debugComment === true) { |
| 338 | - $html .= '<!--' . $tabs . " open elements found-->\r\n"; |
|
| 338 | + $html .= '<!--'.$tabs." open elements found-->\r\n"; |
|
| 339 | 339 | } |
| 340 | 340 | } |
| 341 | 341 | |
@@ -469,7 +469,7 @@ discard block |
||
| 469 | 469 | 'textarea', |
| 470 | 470 | 'pre' |
| 471 | 471 | ]; // eventuell auch: span, script, style |
| 472 | - $peaces = preg_split('#(<(' . implode('|', $splitArray) . ').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
| 472 | + $peaces = preg_split('#(<('.implode('|', $splitArray).').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
| 473 | 473 | $html = ""; |
| 474 | 474 | for ($i = 0; $i < count($peaces); $i++) { |
| 475 | 475 | if (($i + 1) % 3 == 0) { |
@@ -523,8 +523,8 @@ discard block |
||
| 523 | 523 | public function includeHeaderComment(&$html) |
| 524 | 524 | { |
| 525 | 525 | if (!empty($this->headerComment)) { |
| 526 | - $html = preg_replace_callback('/<meta http-equiv(.*)>/Usi', function ($matches) { |
|
| 527 | - return trim($matches[0] . $this->newline . $this->tab . $this->tab . '<!-- ' . $this->headerComment . '-->'); |
|
| 526 | + $html = preg_replace_callback('/<meta http-equiv(.*)>/Usi', function($matches) { |
|
| 527 | + return trim($matches[0].$this->newline.$this->tab.$this->tab.'<!-- '.$this->headerComment.'-->'); |
|
| 528 | 528 | }, $html, 1); |
| 529 | 529 | } |
| 530 | 530 | } |