| @@ -191,7 +191,7 @@ | ||
| 191 | 191 | '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', | 
| 192 | 192 | $html, | 
| 193 | 193 | -1, | 
| 194 | - \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY | |
| 194 | + \PREG_SPLIT_DELIM_CAPTURE|\PREG_SPLIT_NO_EMPTY | |
| 195 | 195 | ); | 
| 196 | 196 | |
| 197 | 197 |          if (false === $htmlArrayTemp) { | 
| @@ -17,415 +17,415 @@ | ||
| 17 | 17 | */ | 
| 18 | 18 | class CleanHtmlService implements SingletonInterface | 
| 19 | 19 |  { | 
| 20 | - /** | |
| 21 | - * Enable Debug comment in footer. | |
| 22 | - * | |
| 23 | - * @var bool | |
| 24 | - */ | |
| 25 | - protected $debugComment = false; | |
| 26 | - | |
| 27 | - /** | |
| 28 | - * Format Type. | |
| 29 | - * | |
| 30 | - * @var int | |
| 31 | - */ | |
| 32 | - protected $formatType = 0; | |
| 33 | - | |
| 34 | - /** | |
| 35 | - * Tab character. | |
| 36 | - * | |
| 37 | - * @var string | |
| 38 | - */ | |
| 39 | - protected $tab = "\t"; | |
| 40 | - | |
| 41 | - /** | |
| 42 | - * Newline character. | |
| 43 | - * | |
| 44 | - * @var string | |
| 45 | - */ | |
| 46 | - protected $newline = "\n"; | |
| 47 | - | |
| 48 | - /** | |
| 49 | - * Configured extra header comment. | |
| 50 | - * | |
| 51 | - * @var string | |
| 52 | - */ | |
| 53 | - protected $headerComment = ''; | |
| 54 | - | |
| 55 | - /** | |
| 56 | - * Empty space char. | |
| 57 | - * | |
| 58 | - * @var string | |
| 59 | - */ | |
| 60 | - protected $emptySpaceChar = ' '; | |
| 61 | - | |
| 62 | - /** | |
| 63 | - * Set variables based on given config. | |
| 64 | - */ | |
| 65 | - public function setVariables(array $config): void | |
| 66 | -    { | |
| 67 | -        if (!empty($config)) { | |
| 68 | -            if ($config['formatHtml'] && is_numeric($config['formatHtml'])) { | |
| 69 | - $this->formatType = (int) $config['formatHtml']; | |
| 70 | - } | |
| 71 | - | |
| 72 | -            if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) { | |
| 73 | -                $this->tab = str_pad('', (int) $config['formatHtml.']['tabSize'], ' '); | |
| 74 | - } | |
| 75 | - | |
| 76 | -            if (isset($config['formatHtml.']['debugComment'])) { | |
| 77 | - $this->debugComment = (bool) $config['formatHtml.']['debugComment']; | |
| 78 | - } | |
| 79 | - | |
| 80 | -            if (isset($config['headerComment'])) { | |
| 81 | - $this->headerComment = $config['headerComment']; | |
| 82 | - } | |
| 83 | - | |
| 84 | -            if (isset($config['dropEmptySpaceChar']) && (bool) $config['dropEmptySpaceChar']) { | |
| 85 | - $this->emptySpaceChar = ''; | |
| 86 | - } | |
| 87 | - } | |
| 88 | - } | |
| 89 | - | |
| 90 | - /** | |
| 91 | - * Clean given HTML with formatter. | |
| 92 | - * | |
| 93 | - * @param string $html | |
| 94 | - * @param array $config | |
| 95 | - * | |
| 96 | - * @return string | |
| 97 | - */ | |
| 98 | - public function clean($html, $config = []) | |
| 99 | -    { | |
| 100 | -        if (!empty($config)) { | |
| 101 | - $this->setVariables($config); | |
| 102 | - } | |
| 103 | - // convert line-breaks to UNIX | |
| 104 | - $this->convNlOs($html); | |
| 105 | - | |
| 106 | - $manipulations = []; | |
| 107 | - | |
| 108 | -        if (isset($config['removeGenerator']) && (bool) $config['removeGenerator']) { | |
| 109 | - $manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class); | |
| 110 | - } | |
| 111 | - | |
| 112 | -        if (isset($config['removeComments']) && (bool) $config['removeComments']) { | |
| 113 | - $manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class); | |
| 114 | - } | |
| 115 | - | |
| 116 | -        if (!empty($this->headerComment)) { | |
| 117 | - $this->includeHeaderComment($html); | |
| 118 | - } | |
| 119 | - | |
| 120 | -        foreach ($manipulations as $key => $manipulation) { | |
| 121 | - /** @var ManipulationInterface $manipulation */ | |
| 122 | - $configuration = isset($config[$key.'.']) && \is_array($config[$key.'.']) ? $config[$key.'.'] : []; | |
| 123 | - $html = $manipulation->manipulate($html, $configuration); | |
| 124 | - } | |
| 125 | - | |
| 126 | - // cleanup HTML5 self-closing elements | |
| 127 | - if (!isset($GLOBALS['TSFE']->config['config']['doctype']) | |
| 128 | -            || 'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'], 0, 1)) { | |
| 129 | - $html = preg_replace( | |
| 130 | - '/<((?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)\s[^>]+?)\s?\/>/', | |
| 131 | - '<$1>', | |
| 132 | - $html | |
| 133 | - ); | |
| 134 | - } | |
| 135 | - | |
| 136 | -        if ($this->formatType > 0) { | |
| 137 | - $html = $this->formatHtml($html); | |
| 138 | - } | |
| 139 | - // remove white space after line ending | |
| 140 | - $this->rTrimLines($html); | |
| 141 | - | |
| 142 | - // recover line-breaks | |
| 143 | -        if (Environment::isWindows()) { | |
| 144 | - $html = str_replace($this->newline, "\r\n", $html); | |
| 145 | - } | |
| 146 | - | |
| 147 | - return $html; | |
| 148 | - } | |
| 149 | - | |
| 150 | - /** | |
| 151 | - * Formats the (X)HTML code: | |
| 152 | - * - taps according to the hirarchy of the tags | |
| 153 | - * - removes empty spaces between tags | |
| 154 | - * - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..) | |
| 155 | - * choose from five options: | |
| 156 | - * 0 => off | |
| 157 | - * 1 => no line break at all (code in one line) | |
| 158 | - * 2 => minimalistic line breaks (structure defining box-elements) | |
| 159 | - * 3 => aesthetic line breaks (important box-elements) | |
| 160 | - * 4 => logic line breaks (all box-elements) | |
| 161 | - * 5 => max line breaks (all elements). | |
| 162 | - * | |
| 163 | - * @param string $html | |
| 164 | - * | |
| 165 | - * @return string | |
| 166 | - */ | |
| 167 | - protected function formatHtml($html) | |
| 168 | -    { | |
| 169 | - // Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers | |
| 170 | - preg_match_all( | |
| 171 | - '/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im', | |
| 172 | - $html, | |
| 173 | - $matches | |
| 174 | - ); | |
| 175 | - $noFormat = $matches[0]; // do not format these block elements | |
| 176 | -        for ($i = 0; $i < \count($noFormat); ++$i) { | |
| 177 | -            $html = str_replace($noFormat[$i], "\n<!-- ELEMENT {$i} -->", $html); | |
| 178 | - } | |
| 179 | - | |
| 180 | - // define box elements for formatting | |
| 181 | - $trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section'; | |
| 182 | - $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup'; | |
| 183 | - $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script'; | |
| 184 | - $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--'; | |
| 185 | - $allBoxLikeElements = '(?>'.$trueBoxElements.'|'.$functionalBoxElements.'|'.$usableBoxElements.'|'.$imagineBoxElements.')'; | |
| 186 | - $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)'; | |
| 187 | - $structureBoxLikeElements = '(?>html|head|body|div|!--)'; | |
| 188 | - | |
| 189 | - // split html into it's elements | |
| 190 | - $htmlArrayTemp = preg_split( | |
| 191 | - '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', | |
| 192 | - $html, | |
| 193 | - -1, | |
| 194 | - \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY | |
| 195 | - ); | |
| 196 | - | |
| 197 | -        if (false === $htmlArrayTemp) { | |
| 198 | - // Restore saved comments, styles and java-scripts | |
| 199 | -            for ($i = 0; $i < \count($noFormat); ++$i) { | |
| 200 | -                $html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html); | |
| 201 | - } | |
| 202 | - | |
| 203 | - return $html; | |
| 204 | - } | |
| 205 | - // remove empty lines | |
| 206 | - $htmlArray = ['']; | |
| 207 | - $index = 1; | |
| 208 | -        for ($x = 0; $x < \count($htmlArrayTemp); ++$x) { | |
| 209 | - $text = trim($htmlArrayTemp[$x]); | |
| 210 | - $htmlArray[$index] = '' !== $text ? $htmlArrayTemp[$x] : $this->emptySpaceChar; | |
| 211 | - ++$index; | |
| 212 | - } | |
| 213 | - | |
| 214 | - // rebuild html | |
| 215 | - $html = ''; | |
| 216 | - $tabs = 0; | |
| 217 | -        for ($x = 0; $x < \count($htmlArray); ++$x) { | |
| 218 | - $htmlArrayBefore = $htmlArray[$x - 1] ?? ''; | |
| 219 | - $htmlArrayCurrent = $htmlArray[$x] ?? ''; | |
| 220 | - | |
| 221 | - // check if the element should stand in a new line | |
| 222 | - $newline = false; | |
| 223 | -            if ('<?xml' == substr($htmlArrayBefore, 0, 5)) { | |
| 224 | - $newline = true; | |
| 225 | - } elseif (2 == $this->formatType && ( // minimalistic line break | |
| 226 | - // this element has a line break before itself | |
| 227 | - preg_match( | |
| 228 | - '/<'.$structureBoxLikeElements.'(.*)>/Usi', | |
| 229 | - $htmlArrayCurrent | |
| 230 | - ) || preg_match( | |
| 231 | - '/<'.$structureBoxLikeElements.'(.*) \/>/Usi', | |
| 232 | - $htmlArrayCurrent | |
| 233 | - ) // one element before is a element that has a line break after | |
| 234 | - || preg_match( | |
| 235 | - '/<\/'.$structureBoxLikeElements.'(.*)>/Usi', | |
| 236 | - $htmlArrayBefore | |
| 237 | - ) || '<!--' == substr( | |
| 238 | - $htmlArrayBefore, | |
| 239 | - 0, | |
| 240 | - 4 | |
| 241 | -                    ) || preg_match('/<'.$structureBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore)) | |
| 242 | -            ) { | |
| 243 | - $newline = true; | |
| 244 | - } elseif (3 == $this->formatType && ( // aestetic line break | |
| 245 | - // this element has a line break before itself | |
| 246 | - preg_match( | |
| 247 | - '/<'.$esteticBoxLikeElements.'(.*)>/Usi', | |
| 248 | - $htmlArrayCurrent | |
| 249 | - ) || preg_match( | |
| 250 | - '/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', | |
| 251 | - $htmlArrayCurrent | |
| 252 | - ) // one element before is a element that has a line break after | |
| 253 | -                    || preg_match('/<\/'.$esteticBoxLikeElements.'(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr( | |
| 254 | - $htmlArrayBefore, | |
| 255 | - 0, | |
| 256 | - 4 | |
| 257 | -                    ) || preg_match('/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore)) | |
| 258 | -            ) { | |
| 259 | - $newline = true; | |
| 260 | - } elseif ($this->formatType >= 4 && ( // logical line break | |
| 261 | - // this element has a line break before itself | |
| 262 | - preg_match( | |
| 263 | - '/<'.$allBoxLikeElements.'(.*)>/Usi', | |
| 264 | - $htmlArrayCurrent | |
| 265 | - ) || preg_match( | |
| 266 | - '/<'.$allBoxLikeElements.'(.*) \/>/Usi', | |
| 267 | - $htmlArrayCurrent | |
| 268 | - ) // one element before is a element that has a line break after | |
| 269 | -                    || preg_match('/<\/'.$allBoxLikeElements.'(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr( | |
| 270 | - $htmlArrayBefore, | |
| 271 | - 0, | |
| 272 | - 4 | |
| 273 | -                    ) || preg_match('/<'.$allBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore)) | |
| 274 | -            ) { | |
| 275 | - $newline = true; | |
| 276 | - } | |
| 277 | - | |
| 278 | - // count down a tab | |
| 279 | -            if ('</' == substr($htmlArrayCurrent, 0, 2)) { | |
| 280 | - --$tabs; | |
| 281 | - } | |
| 282 | - | |
| 283 | - // add tabs and line breaks in front of the current tag | |
| 284 | -            if ($newline) { | |
| 285 | - $html .= $this->newline; | |
| 286 | -                for ($y = 0; $y < $tabs; ++$y) { | |
| 287 | - $html .= $this->tab; | |
| 288 | - } | |
| 289 | - } | |
| 290 | - | |
| 291 | - // remove white spaces and line breaks and add current tag to the html-string | |
| 292 | -            if ('<![CDATA[' == substr($htmlArrayCurrent, 0, 9) // remove multiple white space in CDATA / XML | |
| 293 | - || '<?xml' == substr($htmlArrayCurrent, 0, 5) | |
| 294 | -            ) { | |
| 295 | - $html .= $this->killWhiteSpace($htmlArrayCurrent); | |
| 296 | -            } else { // remove all line breaks | |
| 297 | - $html .= $this->killLineBreaks($htmlArrayCurrent); | |
| 298 | - } | |
| 299 | - | |
| 300 | - // count up a tab | |
| 301 | -            if ('<' == substr($htmlArrayCurrent, 0, 1) && '/' != substr($htmlArrayCurrent, 1, 1)) { | |
| 302 | -                if (' ' !== substr($htmlArrayCurrent, 1, 1) | |
| 303 | - && 'img' !== substr($htmlArrayCurrent, 1, 3) | |
| 304 | - && 'source' !== substr($htmlArrayCurrent, 1, 6) | |
| 305 | - && 'br' !== substr($htmlArrayCurrent, 1, 2) | |
| 306 | - && 'hr' !== substr($htmlArrayCurrent, 1, 2) | |
| 307 | - && 'input' !== substr($htmlArrayCurrent, 1, 5) | |
| 308 | - && 'link' !== substr($htmlArrayCurrent, 1, 4) | |
| 309 | - && 'meta' !== substr($htmlArrayCurrent, 1, 4) | |
| 310 | - && 'col ' !== substr($htmlArrayCurrent, 1, 4) | |
| 311 | - && 'frame' !== substr($htmlArrayCurrent, 1, 5) | |
| 312 | - && 'isindex' !== substr($htmlArrayCurrent, 1, 7) | |
| 313 | - && 'param' !== substr($htmlArrayCurrent, 1, 5) | |
| 314 | - && 'area' !== substr($htmlArrayCurrent, 1, 4) | |
| 315 | - && 'base' !== substr($htmlArrayCurrent, 1, 4) | |
| 316 | - && '<!' !== substr($htmlArrayCurrent, 0, 2) | |
| 317 | - && '<?xml' !== substr($htmlArrayCurrent, 0, 5) | |
| 318 | -                ) { | |
| 319 | - ++$tabs; | |
| 320 | - } | |
| 321 | - } | |
| 322 | - } | |
| 323 | - | |
| 324 | - // Remove empty lines | |
| 325 | -        if ($this->formatType > 1) { | |
| 326 | - $this->removeEmptyLines($html); | |
| 327 | - } | |
| 328 | - | |
| 329 | - // Restore saved comments, styles and java-scripts | |
| 330 | -        for ($i = 0; $i < \count($noFormat); ++$i) { | |
| 331 | -            $html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html); | |
| 332 | - } | |
| 333 | - | |
| 334 | - // include debug comment at the end | |
| 335 | -        if (0 != $tabs && true === $this->debugComment) { | |
| 336 | -            $html .= "<!-- {$tabs} open elements found -->"; | |
| 337 | - } | |
| 338 | - | |
| 339 | - return $html; | |
| 340 | - } | |
| 341 | - | |
| 342 | - /** | |
| 343 | - * Remove ALL line breaks and multiple white space. | |
| 344 | - * | |
| 345 | - * @param string $html | |
| 346 | - * | |
| 347 | - * @return string | |
| 348 | - */ | |
| 349 | - protected function killLineBreaks($html) | |
| 350 | -    { | |
| 351 | - $html = str_replace($this->newline, '', $html); | |
| 352 | - | |
| 353 | -        return preg_replace('/\s\s+/u', ' ', $html); | |
| 354 | -        //? return preg_replace('/\n|\s+(\s)/u', '$1', $html); | |
| 355 | - } | |
| 356 | - | |
| 357 | - /** | |
| 358 | - * Remove multiple white space, keeps line breaks. | |
| 359 | - * | |
| 360 | - * @param string $html | |
| 361 | - * | |
| 362 | - * @return string | |
| 363 | - */ | |
| 364 | - protected function killWhiteSpace($html) | |
| 365 | -    { | |
| 366 | - $temp = explode($this->newline, $html); | |
| 367 | -        for ($i = 0; $i < \count($temp); ++$i) { | |
| 368 | -            if (!trim($temp[$i])) { | |
| 369 | - unset($temp[$i]); | |
| 370 | - continue; | |
| 371 | - } | |
| 372 | - | |
| 373 | - $temp[$i] = trim($temp[$i]); | |
| 374 | -            $temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]); | |
| 375 | - } | |
| 376 | - | |
| 377 | - return implode($this->newline, $temp); | |
| 378 | - } | |
| 379 | - | |
| 380 | - /** | |
| 381 | - * Remove white space at the end of lines, keeps other white space and line breaks. | |
| 382 | - * | |
| 383 | - * @param string $html | |
| 384 | - * | |
| 385 | - * @return string | |
| 386 | - */ | |
| 387 | - protected function rTrimLines(& $html) | |
| 388 | -    { | |
| 389 | -        $html = preg_replace('/\s+$/m', '', $html); | |
| 390 | - } | |
| 391 | - | |
| 392 | - /** | |
| 393 | - * Convert newlines according to the current OS. | |
| 394 | - * | |
| 395 | - * @param string $html | |
| 396 | - * | |
| 397 | - * @return string | |
| 398 | - */ | |
| 399 | - protected function convNlOs(& $html) | |
| 400 | -    { | |
| 401 | -        $html = preg_replace("(\r\n|\r)", $this->newline, $html); | |
| 402 | - } | |
| 403 | - | |
| 404 | - /** | |
| 405 | - * Remove empty lines. | |
| 406 | - * | |
| 407 | - * @param string $html | |
| 408 | - */ | |
| 409 | - protected function removeEmptyLines(& $html): void | |
| 410 | -    { | |
| 411 | - $temp = explode($this->newline, $html); | |
| 412 | - $result = []; | |
| 413 | -        for ($i = 0; $i < \count($temp); ++$i) { | |
| 414 | -            if ('' == trim($temp[$i])) { | |
| 415 | - continue; | |
| 416 | - } | |
| 417 | - $result[] = $temp[$i]; | |
| 418 | - } | |
| 419 | - $html = implode($this->newline, $result); | |
| 420 | - } | |
| 421 | - | |
| 422 | - /** | |
| 423 | - * Include configured header comment in HTML content block. | |
| 424 | - * | |
| 425 | - * @param $html | |
| 426 | - */ | |
| 427 | - public function includeHeaderComment(& $html): void | |
| 428 | -    { | |
| 429 | -        $html = preg_replace('/^(-->)$/m', "\n\t".$this->headerComment."\n$1", $html); | |
| 430 | - } | |
| 20 | + /** | |
| 21 | + * Enable Debug comment in footer. | |
| 22 | + * | |
| 23 | + * @var bool | |
| 24 | + */ | |
| 25 | + protected $debugComment = false; | |
| 26 | + | |
| 27 | + /** | |
| 28 | + * Format Type. | |
| 29 | + * | |
| 30 | + * @var int | |
| 31 | + */ | |
| 32 | + protected $formatType = 0; | |
| 33 | + | |
| 34 | + /** | |
| 35 | + * Tab character. | |
| 36 | + * | |
| 37 | + * @var string | |
| 38 | + */ | |
| 39 | + protected $tab = "\t"; | |
| 40 | + | |
| 41 | + /** | |
| 42 | + * Newline character. | |
| 43 | + * | |
| 44 | + * @var string | |
| 45 | + */ | |
| 46 | + protected $newline = "\n"; | |
| 47 | + | |
| 48 | + /** | |
| 49 | + * Configured extra header comment. | |
| 50 | + * | |
| 51 | + * @var string | |
| 52 | + */ | |
| 53 | + protected $headerComment = ''; | |
| 54 | + | |
| 55 | + /** | |
| 56 | + * Empty space char. | |
| 57 | + * | |
| 58 | + * @var string | |
| 59 | + */ | |
| 60 | + protected $emptySpaceChar = ' '; | |
| 61 | + | |
| 62 | + /** | |
| 63 | + * Set variables based on given config. | |
| 64 | + */ | |
| 65 | + public function setVariables(array $config): void | |
| 66 | +	{ | |
| 67 | +		if (!empty($config)) { | |
| 68 | +			if ($config['formatHtml'] && is_numeric($config['formatHtml'])) { | |
| 69 | + $this->formatType = (int) $config['formatHtml']; | |
| 70 | + } | |
| 71 | + | |
| 72 | +			if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) { | |
| 73 | +				$this->tab = str_pad('', (int) $config['formatHtml.']['tabSize'], ' '); | |
| 74 | + } | |
| 75 | + | |
| 76 | +			if (isset($config['formatHtml.']['debugComment'])) { | |
| 77 | + $this->debugComment = (bool) $config['formatHtml.']['debugComment']; | |
| 78 | + } | |
| 79 | + | |
| 80 | +			if (isset($config['headerComment'])) { | |
| 81 | + $this->headerComment = $config['headerComment']; | |
| 82 | + } | |
| 83 | + | |
| 84 | +			if (isset($config['dropEmptySpaceChar']) && (bool) $config['dropEmptySpaceChar']) { | |
| 85 | + $this->emptySpaceChar = ''; | |
| 86 | + } | |
| 87 | + } | |
| 88 | + } | |
| 89 | + | |
| 90 | + /** | |
| 91 | + * Clean given HTML with formatter. | |
| 92 | + * | |
| 93 | + * @param string $html | |
| 94 | + * @param array $config | |
| 95 | + * | |
| 96 | + * @return string | |
| 97 | + */ | |
| 98 | + public function clean($html, $config = []) | |
| 99 | +	{ | |
| 100 | +		if (!empty($config)) { | |
| 101 | + $this->setVariables($config); | |
| 102 | + } | |
| 103 | + // convert line-breaks to UNIX | |
| 104 | + $this->convNlOs($html); | |
| 105 | + | |
| 106 | + $manipulations = []; | |
| 107 | + | |
| 108 | +		if (isset($config['removeGenerator']) && (bool) $config['removeGenerator']) { | |
| 109 | + $manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class); | |
| 110 | + } | |
| 111 | + | |
| 112 | +		if (isset($config['removeComments']) && (bool) $config['removeComments']) { | |
| 113 | + $manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class); | |
| 114 | + } | |
| 115 | + | |
| 116 | +		if (!empty($this->headerComment)) { | |
| 117 | + $this->includeHeaderComment($html); | |
| 118 | + } | |
| 119 | + | |
| 120 | +		foreach ($manipulations as $key => $manipulation) { | |
| 121 | + /** @var ManipulationInterface $manipulation */ | |
| 122 | + $configuration = isset($config[$key.'.']) && \is_array($config[$key.'.']) ? $config[$key.'.'] : []; | |
| 123 | + $html = $manipulation->manipulate($html, $configuration); | |
| 124 | + } | |
| 125 | + | |
| 126 | + // cleanup HTML5 self-closing elements | |
| 127 | + if (!isset($GLOBALS['TSFE']->config['config']['doctype']) | |
| 128 | +			|| 'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'], 0, 1)) { | |
| 129 | + $html = preg_replace( | |
| 130 | + '/<((?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)\s[^>]+?)\s?\/>/', | |
| 131 | + '<$1>', | |
| 132 | + $html | |
| 133 | + ); | |
| 134 | + } | |
| 135 | + | |
| 136 | +		if ($this->formatType > 0) { | |
| 137 | + $html = $this->formatHtml($html); | |
| 138 | + } | |
| 139 | + // remove white space after line ending | |
| 140 | + $this->rTrimLines($html); | |
| 141 | + | |
| 142 | + // recover line-breaks | |
| 143 | +		if (Environment::isWindows()) { | |
| 144 | + $html = str_replace($this->newline, "\r\n", $html); | |
| 145 | + } | |
| 146 | + | |
| 147 | + return $html; | |
| 148 | + } | |
| 149 | + | |
| 150 | + /** | |
| 151 | + * Formats the (X)HTML code: | |
| 152 | + * - taps according to the hirarchy of the tags | |
| 153 | + * - removes empty spaces between tags | |
| 154 | + * - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..) | |
| 155 | + * choose from five options: | |
| 156 | + * 0 => off | |
| 157 | + * 1 => no line break at all (code in one line) | |
| 158 | + * 2 => minimalistic line breaks (structure defining box-elements) | |
| 159 | + * 3 => aesthetic line breaks (important box-elements) | |
| 160 | + * 4 => logic line breaks (all box-elements) | |
| 161 | + * 5 => max line breaks (all elements). | |
| 162 | + * | |
| 163 | + * @param string $html | |
| 164 | + * | |
| 165 | + * @return string | |
| 166 | + */ | |
| 167 | + protected function formatHtml($html) | |
| 168 | +	{ | |
| 169 | + // Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers | |
| 170 | + preg_match_all( | |
| 171 | + '/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im', | |
| 172 | + $html, | |
| 173 | + $matches | |
| 174 | + ); | |
| 175 | + $noFormat = $matches[0]; // do not format these block elements | |
| 176 | +		for ($i = 0; $i < \count($noFormat); ++$i) { | |
| 177 | +			$html = str_replace($noFormat[$i], "\n<!-- ELEMENT {$i} -->", $html); | |
| 178 | + } | |
| 179 | + | |
| 180 | + // define box elements for formatting | |
| 181 | + $trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section'; | |
| 182 | + $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup'; | |
| 183 | + $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script'; | |
| 184 | + $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--'; | |
| 185 | + $allBoxLikeElements = '(?>'.$trueBoxElements.'|'.$functionalBoxElements.'|'.$usableBoxElements.'|'.$imagineBoxElements.')'; | |
| 186 | + $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)'; | |
| 187 | + $structureBoxLikeElements = '(?>html|head|body|div|!--)'; | |
| 188 | + | |
| 189 | + // split html into it's elements | |
| 190 | + $htmlArrayTemp = preg_split( | |
| 191 | + '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', | |
| 192 | + $html, | |
| 193 | + -1, | |
| 194 | + \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY | |
| 195 | + ); | |
| 196 | + | |
| 197 | +		if (false === $htmlArrayTemp) { | |
| 198 | + // Restore saved comments, styles and java-scripts | |
| 199 | +			for ($i = 0; $i < \count($noFormat); ++$i) { | |
| 200 | +				$html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html); | |
| 201 | + } | |
| 202 | + | |
| 203 | + return $html; | |
| 204 | + } | |
| 205 | + // remove empty lines | |
| 206 | + $htmlArray = ['']; | |
| 207 | + $index = 1; | |
| 208 | +		for ($x = 0; $x < \count($htmlArrayTemp); ++$x) { | |
| 209 | + $text = trim($htmlArrayTemp[$x]); | |
| 210 | + $htmlArray[$index] = '' !== $text ? $htmlArrayTemp[$x] : $this->emptySpaceChar; | |
| 211 | + ++$index; | |
| 212 | + } | |
| 213 | + | |
| 214 | + // rebuild html | |
| 215 | + $html = ''; | |
| 216 | + $tabs = 0; | |
| 217 | +		for ($x = 0; $x < \count($htmlArray); ++$x) { | |
| 218 | + $htmlArrayBefore = $htmlArray[$x - 1] ?? ''; | |
| 219 | + $htmlArrayCurrent = $htmlArray[$x] ?? ''; | |
| 220 | + | |
| 221 | + // check if the element should stand in a new line | |
| 222 | + $newline = false; | |
| 223 | +			if ('<?xml' == substr($htmlArrayBefore, 0, 5)) { | |
| 224 | + $newline = true; | |
| 225 | + } elseif (2 == $this->formatType && ( // minimalistic line break | |
| 226 | + // this element has a line break before itself | |
| 227 | + preg_match( | |
| 228 | + '/<'.$structureBoxLikeElements.'(.*)>/Usi', | |
| 229 | + $htmlArrayCurrent | |
| 230 | + ) || preg_match( | |
| 231 | + '/<'.$structureBoxLikeElements.'(.*) \/>/Usi', | |
| 232 | + $htmlArrayCurrent | |
| 233 | + ) // one element before is a element that has a line break after | |
| 234 | + || preg_match( | |
| 235 | + '/<\/'.$structureBoxLikeElements.'(.*)>/Usi', | |
| 236 | + $htmlArrayBefore | |
| 237 | + ) || '<!--' == substr( | |
| 238 | + $htmlArrayBefore, | |
| 239 | + 0, | |
| 240 | + 4 | |
| 241 | +					) || preg_match('/<'.$structureBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore)) | |
| 242 | +			) { | |
| 243 | + $newline = true; | |
| 244 | + } elseif (3 == $this->formatType && ( // aestetic line break | |
| 245 | + // this element has a line break before itself | |
| 246 | + preg_match( | |
| 247 | + '/<'.$esteticBoxLikeElements.'(.*)>/Usi', | |
| 248 | + $htmlArrayCurrent | |
| 249 | + ) || preg_match( | |
| 250 | + '/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', | |
| 251 | + $htmlArrayCurrent | |
| 252 | + ) // one element before is a element that has a line break after | |
| 253 | +					|| preg_match('/<\/'.$esteticBoxLikeElements.'(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr( | |
| 254 | + $htmlArrayBefore, | |
| 255 | + 0, | |
| 256 | + 4 | |
| 257 | +					) || preg_match('/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore)) | |
| 258 | +			) { | |
| 259 | + $newline = true; | |
| 260 | + } elseif ($this->formatType >= 4 && ( // logical line break | |
| 261 | + // this element has a line break before itself | |
| 262 | + preg_match( | |
| 263 | + '/<'.$allBoxLikeElements.'(.*)>/Usi', | |
| 264 | + $htmlArrayCurrent | |
| 265 | + ) || preg_match( | |
| 266 | + '/<'.$allBoxLikeElements.'(.*) \/>/Usi', | |
| 267 | + $htmlArrayCurrent | |
| 268 | + ) // one element before is a element that has a line break after | |
| 269 | +					|| preg_match('/<\/'.$allBoxLikeElements.'(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr( | |
| 270 | + $htmlArrayBefore, | |
| 271 | + 0, | |
| 272 | + 4 | |
| 273 | +					) || preg_match('/<'.$allBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore)) | |
| 274 | +			) { | |
| 275 | + $newline = true; | |
| 276 | + } | |
| 277 | + | |
| 278 | + // count down a tab | |
| 279 | +			if ('</' == substr($htmlArrayCurrent, 0, 2)) { | |
| 280 | + --$tabs; | |
| 281 | + } | |
| 282 | + | |
| 283 | + // add tabs and line breaks in front of the current tag | |
| 284 | +			if ($newline) { | |
| 285 | + $html .= $this->newline; | |
| 286 | +				for ($y = 0; $y < $tabs; ++$y) { | |
| 287 | + $html .= $this->tab; | |
| 288 | + } | |
| 289 | + } | |
| 290 | + | |
| 291 | + // remove white spaces and line breaks and add current tag to the html-string | |
| 292 | +			if ('<![CDATA[' == substr($htmlArrayCurrent, 0, 9) // remove multiple white space in CDATA / XML | |
| 293 | + || '<?xml' == substr($htmlArrayCurrent, 0, 5) | |
| 294 | +			) { | |
| 295 | + $html .= $this->killWhiteSpace($htmlArrayCurrent); | |
| 296 | +			} else { // remove all line breaks | |
| 297 | + $html .= $this->killLineBreaks($htmlArrayCurrent); | |
| 298 | + } | |
| 299 | + | |
| 300 | + // count up a tab | |
| 301 | +			if ('<' == substr($htmlArrayCurrent, 0, 1) && '/' != substr($htmlArrayCurrent, 1, 1)) { | |
| 302 | +				if (' ' !== substr($htmlArrayCurrent, 1, 1) | |
| 303 | + && 'img' !== substr($htmlArrayCurrent, 1, 3) | |
| 304 | + && 'source' !== substr($htmlArrayCurrent, 1, 6) | |
| 305 | + && 'br' !== substr($htmlArrayCurrent, 1, 2) | |
| 306 | + && 'hr' !== substr($htmlArrayCurrent, 1, 2) | |
| 307 | + && 'input' !== substr($htmlArrayCurrent, 1, 5) | |
| 308 | + && 'link' !== substr($htmlArrayCurrent, 1, 4) | |
| 309 | + && 'meta' !== substr($htmlArrayCurrent, 1, 4) | |
| 310 | + && 'col ' !== substr($htmlArrayCurrent, 1, 4) | |
| 311 | + && 'frame' !== substr($htmlArrayCurrent, 1, 5) | |
| 312 | + && 'isindex' !== substr($htmlArrayCurrent, 1, 7) | |
| 313 | + && 'param' !== substr($htmlArrayCurrent, 1, 5) | |
| 314 | + && 'area' !== substr($htmlArrayCurrent, 1, 4) | |
| 315 | + && 'base' !== substr($htmlArrayCurrent, 1, 4) | |
| 316 | + && '<!' !== substr($htmlArrayCurrent, 0, 2) | |
| 317 | + && '<?xml' !== substr($htmlArrayCurrent, 0, 5) | |
| 318 | +				) { | |
| 319 | + ++$tabs; | |
| 320 | + } | |
| 321 | + } | |
| 322 | + } | |
| 323 | + | |
| 324 | + // Remove empty lines | |
| 325 | +		if ($this->formatType > 1) { | |
| 326 | + $this->removeEmptyLines($html); | |
| 327 | + } | |
| 328 | + | |
| 329 | + // Restore saved comments, styles and java-scripts | |
| 330 | +		for ($i = 0; $i < \count($noFormat); ++$i) { | |
| 331 | +			$html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html); | |
| 332 | + } | |
| 333 | + | |
| 334 | + // include debug comment at the end | |
| 335 | +		if (0 != $tabs && true === $this->debugComment) { | |
| 336 | +			$html .= "<!-- {$tabs} open elements found -->"; | |
| 337 | + } | |
| 338 | + | |
| 339 | + return $html; | |
| 340 | + } | |
| 341 | + | |
| 342 | + /** | |
| 343 | + * Remove ALL line breaks and multiple white space. | |
| 344 | + * | |
| 345 | + * @param string $html | |
| 346 | + * | |
| 347 | + * @return string | |
| 348 | + */ | |
| 349 | + protected function killLineBreaks($html) | |
| 350 | +	{ | |
| 351 | + $html = str_replace($this->newline, '', $html); | |
| 352 | + | |
| 353 | +		return preg_replace('/\s\s+/u', ' ', $html); | |
| 354 | +		//? return preg_replace('/\n|\s+(\s)/u', '$1', $html); | |
| 355 | + } | |
| 356 | + | |
| 357 | + /** | |
| 358 | + * Remove multiple white space, keeps line breaks. | |
| 359 | + * | |
| 360 | + * @param string $html | |
| 361 | + * | |
| 362 | + * @return string | |
| 363 | + */ | |
| 364 | + protected function killWhiteSpace($html) | |
| 365 | +	{ | |
| 366 | + $temp = explode($this->newline, $html); | |
| 367 | +		for ($i = 0; $i < \count($temp); ++$i) { | |
| 368 | +			if (!trim($temp[$i])) { | |
| 369 | + unset($temp[$i]); | |
| 370 | + continue; | |
| 371 | + } | |
| 372 | + | |
| 373 | + $temp[$i] = trim($temp[$i]); | |
| 374 | +			$temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]); | |
| 375 | + } | |
| 376 | + | |
| 377 | + return implode($this->newline, $temp); | |
| 378 | + } | |
| 379 | + | |
| 380 | + /** | |
| 381 | + * Remove white space at the end of lines, keeps other white space and line breaks. | |
| 382 | + * | |
| 383 | + * @param string $html | |
| 384 | + * | |
| 385 | + * @return string | |
| 386 | + */ | |
| 387 | + protected function rTrimLines(& $html) | |
| 388 | +	{ | |
| 389 | +		$html = preg_replace('/\s+$/m', '', $html); | |
| 390 | + } | |
| 391 | + | |
| 392 | + /** | |
| 393 | + * Convert newlines according to the current OS. | |
| 394 | + * | |
| 395 | + * @param string $html | |
| 396 | + * | |
| 397 | + * @return string | |
| 398 | + */ | |
| 399 | + protected function convNlOs(& $html) | |
| 400 | +	{ | |
| 401 | +		$html = preg_replace("(\r\n|\r)", $this->newline, $html); | |
| 402 | + } | |
| 403 | + | |
| 404 | + /** | |
| 405 | + * Remove empty lines. | |
| 406 | + * | |
| 407 | + * @param string $html | |
| 408 | + */ | |
| 409 | + protected function removeEmptyLines(& $html): void | |
| 410 | +	{ | |
| 411 | + $temp = explode($this->newline, $html); | |
| 412 | + $result = []; | |
| 413 | +		for ($i = 0; $i < \count($temp); ++$i) { | |
| 414 | +			if ('' == trim($temp[$i])) { | |
| 415 | + continue; | |
| 416 | + } | |
| 417 | + $result[] = $temp[$i]; | |
| 418 | + } | |
| 419 | + $html = implode($this->newline, $result); | |
| 420 | + } | |
| 421 | + | |
| 422 | + /** | |
| 423 | + * Include configured header comment in HTML content block. | |
| 424 | + * | |
| 425 | + * @param $html | |
| 426 | + */ | |
| 427 | + public function includeHeaderComment(& $html): void | |
| 428 | +	{ | |
| 429 | +		$html = preg_replace('/^(-->)$/m', "\n\t".$this->headerComment."\n$1", $html); | |
| 430 | + } | |
| 431 | 431 | } | 
| @@ -14,67 +14,67 @@ | ||
| 14 | 14 | */ | 
| 15 | 15 | class RemoveComments implements ManipulationInterface | 
| 16 | 16 |  { | 
| 17 | - /** | |
| 18 | - * Patterns for white-listing comments inside content. | |
| 19 | - * | |
| 20 | - * @var array | |
| 21 | - */ | |
| 22 | - protected $whiteListCommentsPatterns = []; | |
| 17 | + /** | |
| 18 | + * Patterns for white-listing comments inside content. | |
| 19 | + * | |
| 20 | + * @var array | |
| 21 | + */ | |
| 22 | + protected $whiteListCommentsPatterns = []; | |
| 23 | 23 | |
| 24 | - /** | |
| 25 | - * @param string $html The original HTML | |
| 26 | - * @param array $configuration Configuration | |
| 27 | - * | |
| 28 | - * @return string the manipulated HTML | |
| 29 | - */ | |
| 30 | - public function manipulate($html, array $configuration = []) | |
| 31 | -    { | |
| 32 | -        if (isset($configuration['keep.'])) { | |
| 33 | - $this->whiteListCommentsPatterns = $configuration['keep.']; | |
| 34 | - } | |
| 24 | + /** | |
| 25 | + * @param string $html The original HTML | |
| 26 | + * @param array $configuration Configuration | |
| 27 | + * | |
| 28 | + * @return string the manipulated HTML | |
| 29 | + */ | |
| 30 | + public function manipulate($html, array $configuration = []) | |
| 31 | +	{ | |
| 32 | +		if (isset($configuration['keep.'])) { | |
| 33 | + $this->whiteListCommentsPatterns = $configuration['keep.']; | |
| 34 | + } | |
| 35 | 35 | |
| 36 | - // match all styles, scripts and comments | |
| 37 | - $matches = []; | |
| 38 | - preg_match_all( | |
| 39 | - '/(?s)((<!--.*?-->)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im', | |
| 40 | - $html, | |
| 41 | - $matches | |
| 42 | - ); | |
| 43 | -        foreach ($matches[0] as $tag) { | |
| 44 | -            if (false === $this->keepComment($tag)) { | |
| 45 | - $html = str_replace($tag, '', $html); | |
| 46 | - } | |
| 47 | - } | |
| 36 | + // match all styles, scripts and comments | |
| 37 | + $matches = []; | |
| 38 | + preg_match_all( | |
| 39 | + '/(?s)((<!--.*?-->)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im', | |
| 40 | + $html, | |
| 41 | + $matches | |
| 42 | + ); | |
| 43 | +		foreach ($matches[0] as $tag) { | |
| 44 | +			if (false === $this->keepComment($tag)) { | |
| 45 | + $html = str_replace($tag, '', $html); | |
| 46 | + } | |
| 47 | + } | |
| 48 | 48 | |
| 49 | - return $html; | |
| 50 | - } | |
| 49 | + return $html; | |
| 50 | + } | |
| 51 | 51 | |
| 52 | - /** | |
| 53 | - * Check if a comment is defined to be kept in a pattern whiteListOfComments. | |
| 54 | - * | |
| 55 | - * @param string $commentHtml | |
| 56 | - * | |
| 57 | - * @return bool | |
| 58 | - */ | |
| 59 | - protected function keepComment($commentHtml) | |
| 60 | -    { | |
| 61 | - // if not even a comment, skip this | |
| 62 | -        if (!preg_match('/^\<\!\-\-(.*?)\-\-\>$/usi', $commentHtml)) { | |
| 63 | - return true; | |
| 64 | - } | |
| 52 | + /** | |
| 53 | + * Check if a comment is defined to be kept in a pattern whiteListOfComments. | |
| 54 | + * | |
| 55 | + * @param string $commentHtml | |
| 56 | + * | |
| 57 | + * @return bool | |
| 58 | + */ | |
| 59 | + protected function keepComment($commentHtml) | |
| 60 | +	{ | |
| 61 | + // if not even a comment, skip this | |
| 62 | +		if (!preg_match('/^\<\!\-\-(.*?)\-\-\>$/usi', $commentHtml)) { | |
| 63 | + return true; | |
| 64 | + } | |
| 65 | 65 | |
| 66 | - // if not defined in white list | |
| 67 | -        if (!empty($this->whiteListCommentsPatterns)) { | |
| 68 | -            $commentHtml = str_replace('<!--', '', $commentHtml); | |
| 69 | -            $commentHtml = str_replace('-->', '', $commentHtml); | |
| 70 | - $commentHtml = trim($commentHtml); | |
| 71 | -            foreach ($this->whiteListCommentsPatterns as $pattern) { | |
| 72 | -                if (!empty($pattern) && preg_match($pattern, $commentHtml)) { | |
| 73 | - return true; | |
| 74 | - } | |
| 75 | - } | |
| 76 | - } | |
| 66 | + // if not defined in white list | |
| 67 | +		if (!empty($this->whiteListCommentsPatterns)) { | |
| 68 | +			$commentHtml = str_replace('<!--', '', $commentHtml); | |
| 69 | +			$commentHtml = str_replace('-->', '', $commentHtml); | |
| 70 | + $commentHtml = trim($commentHtml); | |
| 71 | +			foreach ($this->whiteListCommentsPatterns as $pattern) { | |
| 72 | +				if (!empty($pattern) && preg_match($pattern, $commentHtml)) { | |
| 73 | + return true; | |
| 74 | + } | |
| 75 | + } | |
| 76 | + } | |
| 77 | 77 | |
| 78 | - return false; | |
| 79 | - } | |
| 78 | + return false; | |
| 79 | + } | |
| 80 | 80 | } | 
| @@ -14,11 +14,11 @@ | ||
| 14 | 14 | */ | 
| 15 | 15 | interface ManipulationInterface | 
| 16 | 16 |  { | 
| 17 | - /** | |
| 18 | - * @param string $html The original HTML | |
| 19 | - * @param array $configuration Configuration | |
| 20 | - * | |
| 21 | - * @return string the manipulated HTML | |
| 22 | - */ | |
| 23 | - public function manipulate($html, array $configuration = []); | |
| 17 | + /** | |
| 18 | + * @param string $html The original HTML | |
| 19 | + * @param array $configuration Configuration | |
| 20 | + * | |
| 21 | + * @return string the manipulated HTML | |
| 22 | + */ | |
| 23 | + public function manipulate($html, array $configuration = []); | |
| 24 | 24 | } | 
| @@ -14,16 +14,16 @@ | ||
| 14 | 14 | */ | 
| 15 | 15 | class RemoveGenerator implements ManipulationInterface | 
| 16 | 16 |  { | 
| 17 | - /** | |
| 18 | - * @param string $html The original HTML | |
| 19 | - * @param array $configuration Configuration | |
| 20 | - * | |
| 21 | - * @return string the manipulated HTML | |
| 22 | - */ | |
| 23 | - public function manipulate($html, array $configuration = []) | |
| 24 | -    { | |
| 25 | - $regex = '<meta name=["\']?generator["\']? [^>]+>'; | |
| 17 | + /** | |
| 18 | + * @param string $html The original HTML | |
| 19 | + * @param array $configuration Configuration | |
| 20 | + * | |
| 21 | + * @return string the manipulated HTML | |
| 22 | + */ | |
| 23 | + public function manipulate($html, array $configuration = []) | |
| 24 | +	{ | |
| 25 | + $regex = '<meta name=["\']?generator["\']? [^>]+>'; | |
| 26 | 26 | |
| 27 | -        return preg_replace('/'.$regex.'/is', '', $html); | |
| 28 | - } | |
| 27 | +		return preg_replace('/'.$regex.'/is', '', $html); | |
| 28 | + } | |
| 29 | 29 | } | 
| @@ -19,39 +19,39 @@ | ||
| 19 | 19 | */ | 
| 20 | 20 | class CleanHtmlMiddleware implements MiddlewareInterface | 
| 21 | 21 |  { | 
| 22 | - /** | |
| 23 | - * @var CleanHtmlService | |
| 24 | - */ | |
| 25 | - protected $cleanHtmlService; | |
| 26 | - | |
| 27 | - public function __construct() | |
| 28 | -    { | |
| 29 | - $this->cleanHtmlService = GeneralUtility::makeInstance(CleanHtmlService::class); | |
| 30 | - } | |
| 31 | - | |
| 32 | - /** | |
| 33 | - * Clean the HTML output. | |
| 34 | - */ | |
| 35 | - public function process(ServerRequestInterface $request, RequestHandlerInterface $handler): ResponseInterface | |
| 36 | -    { | |
| 37 | - $response = $handler->handle($request); | |
| 38 | - | |
| 39 | - if (!($response instanceof NullResponse) | |
| 40 | - && $GLOBALS['TSFE'] instanceof TypoScriptFrontendController | |
| 41 | - && false !== (bool) $GLOBALS['TSFE']->config['config']['sourceopt.']['enabled'] | |
| 42 | -        && 'text/html' == substr($response->getHeaderLine('Content-Type'), 0, 9) | |
| 43 | -        ) { | |
| 44 | - $processedHtml = $this->cleanHtmlService->clean( | |
| 45 | - $response->getBody()->__toString(), | |
| 46 | - $GLOBALS['TSFE']->config['config']['sourceopt.'] | |
| 47 | - ); | |
| 48 | - | |
| 49 | - // Replace old body with $processedHtml | |
| 50 | -            $responseBody = new Stream('php://temp', 'rw'); | |
| 51 | - $responseBody->write($processedHtml); | |
| 52 | - $response = $response->withBody($responseBody); | |
| 53 | - } | |
| 54 | - | |
| 55 | - return $response; | |
| 56 | - } | |
| 22 | + /** | |
| 23 | + * @var CleanHtmlService | |
| 24 | + */ | |
| 25 | + protected $cleanHtmlService; | |
| 26 | + | |
| 27 | + public function __construct() | |
| 28 | +	{ | |
| 29 | + $this->cleanHtmlService = GeneralUtility::makeInstance(CleanHtmlService::class); | |
| 30 | + } | |
| 31 | + | |
| 32 | + /** | |
| 33 | + * Clean the HTML output. | |
| 34 | + */ | |
| 35 | + public function process(ServerRequestInterface $request, RequestHandlerInterface $handler): ResponseInterface | |
| 36 | +	{ | |
| 37 | + $response = $handler->handle($request); | |
| 38 | + | |
| 39 | + if (!($response instanceof NullResponse) | |
| 40 | + && $GLOBALS['TSFE'] instanceof TypoScriptFrontendController | |
| 41 | + && false !== (bool) $GLOBALS['TSFE']->config['config']['sourceopt.']['enabled'] | |
| 42 | +		&& 'text/html' == substr($response->getHeaderLine('Content-Type'), 0, 9) | |
| 43 | +		) { | |
| 44 | + $processedHtml = $this->cleanHtmlService->clean( | |
| 45 | + $response->getBody()->__toString(), | |
| 46 | + $GLOBALS['TSFE']->config['config']['sourceopt.'] | |
| 47 | + ); | |
| 48 | + | |
| 49 | + // Replace old body with $processedHtml | |
| 50 | +			$responseBody = new Stream('php://temp', 'rw'); | |
| 51 | + $responseBody->write($processedHtml); | |
| 52 | + $response = $response->withBody($responseBody); | |
| 53 | + } | |
| 54 | + | |
| 55 | + return $response; | |
| 56 | + } | |
| 57 | 57 | } | 
| @@ -18,27 +18,27 @@ | ||
| 18 | 18 | */ | 
| 19 | 19 | class SvgStoreMiddleware implements MiddlewareInterface | 
| 20 | 20 |  { | 
| 21 | - /** | |
| 22 | - * Search/Extract/Merge SVGs @ HTML output. | |
| 23 | - */ | |
| 24 | - public function process(ServerRequestInterface $request, RequestHandlerInterface $handler): ResponseInterface | |
| 25 | -    { | |
| 26 | - $response = $handler->handle($request); | |
| 21 | + /** | |
| 22 | + * Search/Extract/Merge SVGs @ HTML output. | |
| 23 | + */ | |
| 24 | + public function process(ServerRequestInterface $request, RequestHandlerInterface $handler): ResponseInterface | |
| 25 | +	{ | |
| 26 | + $response = $handler->handle($request); | |
| 27 | 27 | |
| 28 | - if (!($response instanceof NullResponse) | |
| 29 | - && $GLOBALS['TSFE'] instanceof TypoScriptFrontendController | |
| 30 | - && false !== (bool) $GLOBALS['TSFE']->config['config']['svgstore.']['enabled'] | |
| 31 | -        && 'text/html' == substr($response->getHeaderLine('Content-Type'), 0, 9) | |
| 32 | -        ) { | |
| 33 | - $processedHtml = GeneralUtility::makeInstance(\HTML\Sourceopt\Service\SvgStoreService::class) | |
| 34 | - ->process($response->getBody()->__toString()) | |
| 35 | - ; | |
| 28 | + if (!($response instanceof NullResponse) | |
| 29 | + && $GLOBALS['TSFE'] instanceof TypoScriptFrontendController | |
| 30 | + && false !== (bool) $GLOBALS['TSFE']->config['config']['svgstore.']['enabled'] | |
| 31 | +		&& 'text/html' == substr($response->getHeaderLine('Content-Type'), 0, 9) | |
| 32 | +		) { | |
| 33 | + $processedHtml = GeneralUtility::makeInstance(\HTML\Sourceopt\Service\SvgStoreService::class) | |
| 34 | + ->process($response->getBody()->__toString()) | |
| 35 | + ; | |
| 36 | 36 | |
| 37 | -            $responseBody = new Stream('php://temp', 'rw'); | |
| 38 | - $responseBody->write($processedHtml); | |
| 39 | - $response = $response->withBody($responseBody); | |
| 40 | - } | |
| 37 | +			$responseBody = new Stream('php://temp', 'rw'); | |
| 38 | + $responseBody->write($processedHtml); | |
| 39 | + $response = $response->withBody($responseBody); | |
| 40 | + } | |
| 41 | 41 | |
| 42 | - return $response; | |
| 43 | - } | |
| 42 | + return $response; | |
| 43 | + } | |
| 44 | 44 | } | 
| @@ -45,7 +45,7 @@ discard block | ||
| 45 | 45 | } | 
| 46 | 46 | |
| 47 | 47 | // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img#attributes | 
| 48 | -        $html['body'] = preg_replace_callback('/<img(?<pre>[^>]*)src="(?<src>\/[^"]+\.svg)"(?<post>[^>]*?)[\s\/]*>(?!\s*<\/picture>)/s', function (array $match): string { // ^[/] | |
| 48 | +        $html['body'] = preg_replace_callback('/<img(?<pre>[^>]*)src="(?<src>\/[^"]+\.svg)"(?<post>[^>]*?)[\s\/]*>(?!\s*<\/picture>)/s', function(array $match): string { // ^[/] | |
| 49 | 49 |              if (!isset($this->svgFileArr[$match['src']])) { // check usage | 
| 50 | 50 | return $match[0]; | 
| 51 | 51 | } | 
| @@ -55,7 +55,7 @@ discard block | ||
| 55 | 55 | }, $html['body']); | 
| 56 | 56 | |
| 57 | 57 | // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/object#attributes | 
| 58 | -        $html['body'] = preg_replace_callback('/<object(?<pre>[^>]*)data="(?<data>\/[^"]+\.svg)"(?<post>[^>]*?)[\s\/]*>(?:<\/object>)/s', function (array $match): string { // ^[/] | |
| 58 | +        $html['body'] = preg_replace_callback('/<object(?<pre>[^>]*)data="(?<data>\/[^"]+\.svg)"(?<post>[^>]*?)[\s\/]*>(?:<\/object>)/s', function(array $match): string { // ^[/] | |
| 59 | 59 |              if (!isset($this->svgFileArr[$match['data']])) { // check usage | 
| 60 | 60 | return $match[0]; | 
| 61 | 61 | } | 
| @@ -102,7 +102,7 @@ discard block | ||
| 102 | 102 |          $svg = preg_replace('/.*<svg|<\/svg>.*|xlink:|\s(?:(?:version|xmlns)|(?:[a-z\-]+\:[a-z\-]+))="[^"]*"/s', '', $svg); // cleanup | 
| 103 | 103 | |
| 104 | 104 | // https://developer.mozilla.org/en-US/docs/Web/SVG/Element/svg#attributes | 
| 105 | -        $svg = preg_replace_callback('/([^>]+)\s*(?=>)/s', function (array $match) use (&$attr): string { | |
| 105 | +        $svg = preg_replace_callback('/([^>]+)\s*(?=>)/s', function(array $match) use (&$attr): string { | |
| 106 | 106 |              if (false === preg_match_all('/\s(?<attr>[\w\-]+)="\s*(?<value>[^"]+)\s*"/', $match[1], $matches)) { | 
| 107 | 107 | return $match[0]; | 
| 108 | 108 | } | 
| @@ -146,7 +146,7 @@ discard block | ||
| 146 | 146 | |
| 147 | 147 | $svg = preg_replace_callback( | 
| 148 | 148 | '/<use(?<pre>.*?)(?:xlink:)?href="(?<href>\/.+?\.svg)#[^"]+"(?<post>.*?)[\s\/]*>(?:<\/use>)?/s', | 
| 149 | -            function (array $match): string { | |
| 149 | +            function(array $match): string { | |
| 150 | 150 |                  if (!isset($this->svgFileArr[$match['href']])) { // check usage | 
| 151 | 151 | return $match[0]; | 
| 152 | 152 | } | 
| @@ -13,186 +13,186 @@ | ||
| 13 | 13 | */ | 
| 14 | 14 | class SvgStoreService implements \TYPO3\CMS\Core\SingletonInterface | 
| 15 | 15 |  { | 
| 16 | - /** | |
| 17 | - * SVG-Sprite storage directory. | |
| 18 | - * | |
| 19 | - * @var string | |
| 20 | - */ | |
| 21 | - protected $outputDir = '/typo3temp/assets/svg/'; | |
| 22 | - | |
| 23 | - public function __construct() | |
| 24 | -    { | |
| 25 | - //$this->styl = []; # https://stackoverflow.com/questions/39583880/external-svg-fails-to-apply-internal-css | |
| 26 | - //$this->defs = []; # https://bugs.chromium.org/p/chromium/issues/detail?id=751733#c14 | |
| 27 | - $this->svgs = []; | |
| 28 | - | |
| 29 | - $this->sitePath = \TYPO3\CMS\Core\Core\Environment::getPublicPath(); // [^/]$ | |
| 30 | -        $this->svgCache = GeneralUtility::makeInstance(\TYPO3\CMS\Core\Cache\CacheManager::class)->getCache('svgstore'); | |
| 31 | - } | |
| 32 | - | |
| 33 | - public function process(string $html): string | |
| 34 | -    { | |
| 35 | -        $this->spritePath = $this->svgCache->get('spritePath'); | |
| 36 | -        $this->svgFileArr = $this->svgCache->get('svgFileArr'); | |
| 37 | - | |
| 38 | -        if (empty($this->spritePath) && !$this->populateCache()) { | |
| 39 | -            throw new \Exception('could not write file: '.$this->sitePath.$this->spritePath); | |
| 40 | - } | |
| 41 | - | |
| 42 | -        if (!file_exists($this->sitePath.$this->spritePath)) { | |
| 43 | -            throw new \Exception('file does not exists: '.$this->sitePath.$this->spritePath); | |
| 44 | - } | |
| 45 | - | |
| 46 | -        if (!preg_match('/(?<head>.+?<\/head>)(?<body>.+)/s', $html, $html) && 5 == \count($html)) { | |
| 47 | -            throw new \Exception('fix HTML!'); | |
| 48 | - } | |
| 49 | - | |
| 50 | - // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img#attributes | |
| 51 | -        $html['body'] = preg_replace_callback('/<img(?<pre>[^>]*)src="(?<src>\/[^"]+\.svg)"(?<post>[^>]*?)[\s\/]*>(?!\s*<\/picture>)/s', function (array $match): string { // ^[/] | |
| 52 | -            if (!isset($this->svgFileArr[$match['src']])) { // check usage | |
| 53 | - return $match[0]; | |
| 54 | - } | |
| 55 | -            $attr = preg_replace('/\s(?:alt|ismap|loading|title|sizes|srcset|usemap|crossorigin|decoding|referrerpolicy)="[^"]*"/', '', $match['pre'].$match['post']); // cleanup | |
| 56 | - | |
| 57 | -            return sprintf('<svg %s %s><use href="%s#%s"/></svg>', $this->svgFileArr[$match['src']]['attr'], trim($attr), $this->spritePath, $this->convertFilePath($match['src'])); | |
| 58 | - }, $html['body']); | |
| 59 | - | |
| 60 | - // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/object#attributes | |
| 61 | -        $html['body'] = preg_replace_callback('/<object(?<pre>[^>]*)data="(?<data>\/[^"]+\.svg)"(?<post>[^>]*?)[\s\/]*>(?:<\/object>)/s', function (array $match): string { // ^[/] | |
| 62 | -            if (!isset($this->svgFileArr[$match['data']])) { // check usage | |
| 63 | - return $match[0]; | |
| 64 | - } | |
| 65 | -            $attr = preg_replace('/\s(?:form|name|type|usemap)="[^"]*"/', '', $match['pre'].$match['post']); // cleanup | |
| 66 | - | |
| 67 | -            return sprintf('<svg %s %s><use href="%s#%s"/></svg>', $this->svgFileArr[$match['data']]['attr'], trim($attr), $this->spritePath, $this->convertFilePath($match['data'])); | |
| 68 | - }, $html['body']); | |
| 69 | - | |
| 70 | - return $html['head'].$html['body']; | |
| 71 | - } | |
| 72 | - | |
| 73 | - private function convertFilePath(string $path): string | |
| 74 | -    { | |
| 75 | -        return preg_replace('/.svg$|[^\w\-]/', '', str_replace('/', '-', ltrim($path, '/'))); // ^[^/] | |
| 76 | - } | |
| 77 | - | |
| 78 | - private function addFileToSpriteArr(string $hash, string $path): ?array | |
| 79 | -    { | |
| 80 | -        if (1 === preg_match('/(?:;base64|i:a?i?pgf)/', $svg = file_get_contents($this->sitePath.$path))) { // noop! | |
| 81 | - return null; | |
| 82 | - } | |
| 83 | - | |
| 84 | -        if (1 === preg_match('/<(?:style|defs)|url\(/', $svg)) { | |
| 85 | - return null; // check links @ __construct | |
| 86 | - } | |
| 87 | - | |
| 88 | -        //$svg = preg_replace('/((?:id|class)=")/', '$1'.$hash.'__', $svg); // extend  IDs | |
| 89 | -        //$svg = preg_replace('/(href="|url\()#/', '$1#'.$hash.'__', $svg); // recover IDs | |
| 90 | - | |
| 91 | -        //$svg = preg_replace_callback('/<style[^>]*>(?<styl>.+?)<\/style>|<defs[^>]*>(?<defs>.+?)<\/defs>/s', function(array $match) use($hash): string { | |
| 92 | - // | |
| 93 | - // if(isset($match['styl'])) | |
| 94 | -        //    { | |
| 95 | -        //        $this->styl[] = preg_replace('/\s*(\.|#){1}(.+?)\s*\{/', '$1'.$hash.'__$2{', $match['styl']); // patch CSS # https://mathiasbynens.be/notes/css-escapes | |
| 96 | - // } | |
| 97 | - // if(isset($match['defs'])) | |
| 98 | -        //    { | |
| 99 | - // $this->defs[] = trim($match['defs']); | |
| 100 | - // } | |
| 101 | - // return ''; | |
| 102 | - //}, $svg); | |
| 103 | - | |
| 104 | - // https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/xlink:href | |
| 105 | -        $svg = preg_replace('/.*<svg|<\/svg>.*|xlink:|\s(?:(?:version|xmlns)|(?:[a-z\-]+\:[a-z\-]+))="[^"]*"/s', '', $svg); // cleanup | |
| 106 | - | |
| 107 | - // https://developer.mozilla.org/en-US/docs/Web/SVG/Element/svg#attributes | |
| 108 | -        $svg = preg_replace_callback('/([^>]+)\s*(?=>)/s', function (array $match) use (&$attr): string { | |
| 109 | -            if (false === preg_match_all('/\s(?<attr>[\w\-]+)="\s*(?<value>[^"]+)\s*"/', $match[1], $matches)) { | |
| 110 | - return $match[0]; | |
| 111 | - } | |
| 112 | -            foreach ($matches['attr'] as $index => $attribute) { | |
| 113 | -                switch ($attribute) { | |
| 114 | - case 'id': | |
| 115 | - case 'width': | |
| 116 | - case 'height': | |
| 117 | - unset($matches[0][$index]); | |
| 118 | - break; | |
| 119 | - | |
| 120 | - case 'viewBox': | |
| 121 | -                      $attr[] = sprintf('%s="%s"', $attribute, $matches['value'][$index]); // save! | |
| 122 | - // no break | |
| 123 | - default: | |
| 124 | -                      $matches[0][$index] = sprintf('%s="%s"', $attribute, $matches['value'][$index]); // cleanup | |
| 125 | - } | |
| 126 | - } | |
| 127 | - | |
| 128 | -            return implode(' ', $matches[0]); | |
| 129 | - }, $svg, 1); | |
| 130 | - | |
| 131 | -        if ($attr) { // TODO; beautify | |
| 132 | -            $this->svgs[] = sprintf('id="%s" %s', $this->convertFilePath($path), $svg); // append ID | |
| 133 | - } | |
| 134 | - | |
| 135 | -        return !$attr ?: ['attr' => implode(' ', $attr), 'hash' => $hash]; | |
| 136 | - } | |
| 137 | - | |
| 138 | - private function populateCache(): bool | |
| 139 | -    { | |
| 140 | - $storageArr = GeneralUtility::makeInstance(\TYPO3\CMS\Core\Resource\StorageRepository::class)->findAll(); | |
| 141 | -        foreach ($storageArr as $storage) { | |
| 142 | -            if ('relative' == $storage->getConfiguration()['pathType']) { | |
| 143 | - $storageArr[$storage->getUid()] = rtrim($storage->getConfiguration()['basePath'], '/'); // [^/]$ | |
| 144 | - } | |
| 145 | - } | |
| 146 | - unset($storageArr[0]); // keep! | |
| 147 | - | |
| 148 | - $svgFileArr = GeneralUtility::makeInstance(\HTML\Sourceopt\Resource\SvgFileRepository::class)->findAllByStorageUids(array_keys($storageArr)); | |
| 149 | -        foreach ($svgFileArr as $index => $row) { | |
| 150 | -            if (!$this->svgFileArr[($row['path'] = '/'.$storageArr[$row['storage']].$row['identifier'])] = $this->addFileToSpriteArr($row['sha1'], $row['path'])) { // ^[/] | |
| 151 | - unset($this->svgFileArr[$row['path']]); | |
| 152 | - } | |
| 153 | - } | |
| 154 | - | |
| 155 | - unset($storageArr); // save MEM | |
| 156 | - unset($svgFileArr); // save MEM | |
| 157 | - | |
| 158 | - $svg = preg_replace_callback( | |
| 159 | - '/<use(?<pre>.*?)(?:xlink:)?href="(?<href>\/.+?\.svg)#[^"]+"(?<post>.*?)[\s\/]*>(?:<\/use>)?/s', | |
| 160 | -            function (array $match): string { | |
| 161 | -                if (!isset($this->svgFileArr[$match['href']])) { // check usage | |
| 162 | - return $match[0]; | |
| 163 | - } | |
| 164 | -                return sprintf('<use%s href="#%s"/>', $match['pre'].$match['post'], $this->convertFilePath($match['href'])); | |
| 165 | - }, | |
| 166 | - '<svg xmlns="http://www.w3.org/2000/svg">' | |
| 167 | -            //."\n<style>\n".implode("\n", $this->styl)."\n</style>" | |
| 168 | -            //."\n<defs>\n".implode("\n", $this->defs)."\n</defs>" | |
| 169 | -            ."\n<symbol ".implode("</symbol>\n<symbol ", $this->svgs)."</symbol>\n" | |
| 170 | - .'</svg>' | |
| 171 | - ); | |
| 172 | - | |
| 173 | - //unset($this->styl); // save MEM | |
| 174 | - //unset($this->defs); // save MEM | |
| 175 | - unset($this->svgs); // save MEM | |
| 176 | - | |
| 177 | -        if (\is_int($var = $GLOBALS['TSFE']->config['config']['sourceopt.']['formatHtml']) && 1 == $var) { | |
| 178 | -            $svg = preg_replace('/[\n\r\t\v\0]|\s{2,}/', '', $svg); | |
| 179 | - } | |
| 180 | - | |
| 181 | -        $svg = preg_replace('/<([a-z]+)\s*(\/|>\s*<\/\1)>\s*/i', '', $svg); // remove emtpy | |
| 182 | -        $svg = preg_replace('/<((circle|ellipse|line|path|polygon|polyline|rect|stop|use)\s[^>]+?)\s*>\s*<\/\2>/', '<$1/>', $svg); // shorten/minify | |
| 183 | - | |
| 184 | -        if (!is_dir($this->sitePath.$this->outputDir)) { | |
| 185 | - GeneralUtility::mkdir_deep($this->sitePath.$this->outputDir); | |
| 186 | - } | |
| 187 | - | |
| 188 | -        $this->spritePath = $this->outputDir.hash('sha1', serialize($this->svgFileArr)).'.svg'; | |
| 189 | -        if (false === file_put_contents($this->sitePath.$this->spritePath, $svg)) { | |
| 190 | - return false; | |
| 191 | - } | |
| 192 | - | |
| 193 | -        $this->svgCache->set('svgFileArr', $this->svgFileArr); | |
| 194 | -        $this->svgCache->set('spritePath', $this->spritePath); | |
| 195 | - | |
| 196 | - return true; | |
| 197 | - } | |
| 16 | + /** | |
| 17 | + * SVG-Sprite storage directory. | |
| 18 | + * | |
| 19 | + * @var string | |
| 20 | + */ | |
| 21 | + protected $outputDir = '/typo3temp/assets/svg/'; | |
| 22 | + | |
| 23 | + public function __construct() | |
| 24 | +	{ | |
| 25 | + //$this->styl = []; # https://stackoverflow.com/questions/39583880/external-svg-fails-to-apply-internal-css | |
| 26 | + //$this->defs = []; # https://bugs.chromium.org/p/chromium/issues/detail?id=751733#c14 | |
| 27 | + $this->svgs = []; | |
| 28 | + | |
| 29 | + $this->sitePath = \TYPO3\CMS\Core\Core\Environment::getPublicPath(); // [^/]$ | |
| 30 | +		$this->svgCache = GeneralUtility::makeInstance(\TYPO3\CMS\Core\Cache\CacheManager::class)->getCache('svgstore'); | |
| 31 | + } | |
| 32 | + | |
| 33 | + public function process(string $html): string | |
| 34 | +	{ | |
| 35 | +		$this->spritePath = $this->svgCache->get('spritePath'); | |
| 36 | +		$this->svgFileArr = $this->svgCache->get('svgFileArr'); | |
| 37 | + | |
| 38 | +		if (empty($this->spritePath) && !$this->populateCache()) { | |
| 39 | +			throw new \Exception('could not write file: '.$this->sitePath.$this->spritePath); | |
| 40 | + } | |
| 41 | + | |
| 42 | +		if (!file_exists($this->sitePath.$this->spritePath)) { | |
| 43 | +			throw new \Exception('file does not exists: '.$this->sitePath.$this->spritePath); | |
| 44 | + } | |
| 45 | + | |
| 46 | +		if (!preg_match('/(?<head>.+?<\/head>)(?<body>.+)/s', $html, $html) && 5 == \count($html)) { | |
| 47 | +			throw new \Exception('fix HTML!'); | |
| 48 | + } | |
| 49 | + | |
| 50 | + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img#attributes | |
| 51 | +		$html['body'] = preg_replace_callback('/<img(?<pre>[^>]*)src="(?<src>\/[^"]+\.svg)"(?<post>[^>]*?)[\s\/]*>(?!\s*<\/picture>)/s', function (array $match): string { // ^[/] | |
| 52 | +			if (!isset($this->svgFileArr[$match['src']])) { // check usage | |
| 53 | + return $match[0]; | |
| 54 | + } | |
| 55 | +			$attr = preg_replace('/\s(?:alt|ismap|loading|title|sizes|srcset|usemap|crossorigin|decoding|referrerpolicy)="[^"]*"/', '', $match['pre'].$match['post']); // cleanup | |
| 56 | + | |
| 57 | +			return sprintf('<svg %s %s><use href="%s#%s"/></svg>', $this->svgFileArr[$match['src']]['attr'], trim($attr), $this->spritePath, $this->convertFilePath($match['src'])); | |
| 58 | + }, $html['body']); | |
| 59 | + | |
| 60 | + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/object#attributes | |
| 61 | +		$html['body'] = preg_replace_callback('/<object(?<pre>[^>]*)data="(?<data>\/[^"]+\.svg)"(?<post>[^>]*?)[\s\/]*>(?:<\/object>)/s', function (array $match): string { // ^[/] | |
| 62 | +			if (!isset($this->svgFileArr[$match['data']])) { // check usage | |
| 63 | + return $match[0]; | |
| 64 | + } | |
| 65 | +			$attr = preg_replace('/\s(?:form|name|type|usemap)="[^"]*"/', '', $match['pre'].$match['post']); // cleanup | |
| 66 | + | |
| 67 | +			return sprintf('<svg %s %s><use href="%s#%s"/></svg>', $this->svgFileArr[$match['data']]['attr'], trim($attr), $this->spritePath, $this->convertFilePath($match['data'])); | |
| 68 | + }, $html['body']); | |
| 69 | + | |
| 70 | + return $html['head'].$html['body']; | |
| 71 | + } | |
| 72 | + | |
| 73 | + private function convertFilePath(string $path): string | |
| 74 | +	{ | |
| 75 | +		return preg_replace('/.svg$|[^\w\-]/', '', str_replace('/', '-', ltrim($path, '/'))); // ^[^/] | |
| 76 | + } | |
| 77 | + | |
| 78 | + private function addFileToSpriteArr(string $hash, string $path): ?array | |
| 79 | +	{ | |
| 80 | +		if (1 === preg_match('/(?:;base64|i:a?i?pgf)/', $svg = file_get_contents($this->sitePath.$path))) { // noop! | |
| 81 | + return null; | |
| 82 | + } | |
| 83 | + | |
| 84 | +		if (1 === preg_match('/<(?:style|defs)|url\(/', $svg)) { | |
| 85 | + return null; // check links @ __construct | |
| 86 | + } | |
| 87 | + | |
| 88 | +		//$svg = preg_replace('/((?:id|class)=")/', '$1'.$hash.'__', $svg); // extend  IDs | |
| 89 | +		//$svg = preg_replace('/(href="|url\()#/', '$1#'.$hash.'__', $svg); // recover IDs | |
| 90 | + | |
| 91 | +		//$svg = preg_replace_callback('/<style[^>]*>(?<styl>.+?)<\/style>|<defs[^>]*>(?<defs>.+?)<\/defs>/s', function(array $match) use($hash): string { | |
| 92 | + // | |
| 93 | + // if(isset($match['styl'])) | |
| 94 | +		//    { | |
| 95 | +		//        $this->styl[] = preg_replace('/\s*(\.|#){1}(.+?)\s*\{/', '$1'.$hash.'__$2{', $match['styl']); // patch CSS # https://mathiasbynens.be/notes/css-escapes | |
| 96 | + // } | |
| 97 | + // if(isset($match['defs'])) | |
| 98 | +		//    { | |
| 99 | + // $this->defs[] = trim($match['defs']); | |
| 100 | + // } | |
| 101 | + // return ''; | |
| 102 | + //}, $svg); | |
| 103 | + | |
| 104 | + // https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/xlink:href | |
| 105 | +		$svg = preg_replace('/.*<svg|<\/svg>.*|xlink:|\s(?:(?:version|xmlns)|(?:[a-z\-]+\:[a-z\-]+))="[^"]*"/s', '', $svg); // cleanup | |
| 106 | + | |
| 107 | + // https://developer.mozilla.org/en-US/docs/Web/SVG/Element/svg#attributes | |
| 108 | +		$svg = preg_replace_callback('/([^>]+)\s*(?=>)/s', function (array $match) use (&$attr): string { | |
| 109 | +			if (false === preg_match_all('/\s(?<attr>[\w\-]+)="\s*(?<value>[^"]+)\s*"/', $match[1], $matches)) { | |
| 110 | + return $match[0]; | |
| 111 | + } | |
| 112 | +			foreach ($matches['attr'] as $index => $attribute) { | |
| 113 | +				switch ($attribute) { | |
| 114 | + case 'id': | |
| 115 | + case 'width': | |
| 116 | + case 'height': | |
| 117 | + unset($matches[0][$index]); | |
| 118 | + break; | |
| 119 | + | |
| 120 | + case 'viewBox': | |
| 121 | +					  $attr[] = sprintf('%s="%s"', $attribute, $matches['value'][$index]); // save! | |
| 122 | + // no break | |
| 123 | + default: | |
| 124 | +					  $matches[0][$index] = sprintf('%s="%s"', $attribute, $matches['value'][$index]); // cleanup | |
| 125 | + } | |
| 126 | + } | |
| 127 | + | |
| 128 | +			return implode(' ', $matches[0]); | |
| 129 | + }, $svg, 1); | |
| 130 | + | |
| 131 | +		if ($attr) { // TODO; beautify | |
| 132 | +			$this->svgs[] = sprintf('id="%s" %s', $this->convertFilePath($path), $svg); // append ID | |
| 133 | + } | |
| 134 | + | |
| 135 | +		return !$attr ?: ['attr' => implode(' ', $attr), 'hash' => $hash]; | |
| 136 | + } | |
| 137 | + | |
| 138 | + private function populateCache(): bool | |
| 139 | +	{ | |
| 140 | + $storageArr = GeneralUtility::makeInstance(\TYPO3\CMS\Core\Resource\StorageRepository::class)->findAll(); | |
| 141 | +		foreach ($storageArr as $storage) { | |
| 142 | +			if ('relative' == $storage->getConfiguration()['pathType']) { | |
| 143 | + $storageArr[$storage->getUid()] = rtrim($storage->getConfiguration()['basePath'], '/'); // [^/]$ | |
| 144 | + } | |
| 145 | + } | |
| 146 | + unset($storageArr[0]); // keep! | |
| 147 | + | |
| 148 | + $svgFileArr = GeneralUtility::makeInstance(\HTML\Sourceopt\Resource\SvgFileRepository::class)->findAllByStorageUids(array_keys($storageArr)); | |
| 149 | +		foreach ($svgFileArr as $index => $row) { | |
| 150 | +			if (!$this->svgFileArr[($row['path'] = '/'.$storageArr[$row['storage']].$row['identifier'])] = $this->addFileToSpriteArr($row['sha1'], $row['path'])) { // ^[/] | |
| 151 | + unset($this->svgFileArr[$row['path']]); | |
| 152 | + } | |
| 153 | + } | |
| 154 | + | |
| 155 | + unset($storageArr); // save MEM | |
| 156 | + unset($svgFileArr); // save MEM | |
| 157 | + | |
| 158 | + $svg = preg_replace_callback( | |
| 159 | + '/<use(?<pre>.*?)(?:xlink:)?href="(?<href>\/.+?\.svg)#[^"]+"(?<post>.*?)[\s\/]*>(?:<\/use>)?/s', | |
| 160 | +			function (array $match): string { | |
| 161 | +				if (!isset($this->svgFileArr[$match['href']])) { // check usage | |
| 162 | + return $match[0]; | |
| 163 | + } | |
| 164 | +				return sprintf('<use%s href="#%s"/>', $match['pre'].$match['post'], $this->convertFilePath($match['href'])); | |
| 165 | + }, | |
| 166 | + '<svg xmlns="http://www.w3.org/2000/svg">' | |
| 167 | +			//."\n<style>\n".implode("\n", $this->styl)."\n</style>" | |
| 168 | +			//."\n<defs>\n".implode("\n", $this->defs)."\n</defs>" | |
| 169 | +			."\n<symbol ".implode("</symbol>\n<symbol ", $this->svgs)."</symbol>\n" | |
| 170 | + .'</svg>' | |
| 171 | + ); | |
| 172 | + | |
| 173 | + //unset($this->styl); // save MEM | |
| 174 | + //unset($this->defs); // save MEM | |
| 175 | + unset($this->svgs); // save MEM | |
| 176 | + | |
| 177 | +		if (\is_int($var = $GLOBALS['TSFE']->config['config']['sourceopt.']['formatHtml']) && 1 == $var) { | |
| 178 | +			$svg = preg_replace('/[\n\r\t\v\0]|\s{2,}/', '', $svg); | |
| 179 | + } | |
| 180 | + | |
| 181 | +		$svg = preg_replace('/<([a-z]+)\s*(\/|>\s*<\/\1)>\s*/i', '', $svg); // remove emtpy | |
| 182 | +		$svg = preg_replace('/<((circle|ellipse|line|path|polygon|polyline|rect|stop|use)\s[^>]+?)\s*>\s*<\/\2>/', '<$1/>', $svg); // shorten/minify | |
| 183 | + | |
| 184 | +		if (!is_dir($this->sitePath.$this->outputDir)) { | |
| 185 | + GeneralUtility::mkdir_deep($this->sitePath.$this->outputDir); | |
| 186 | + } | |
| 187 | + | |
| 188 | +		$this->spritePath = $this->outputDir.hash('sha1', serialize($this->svgFileArr)).'.svg'; | |
| 189 | +		if (false === file_put_contents($this->sitePath.$this->spritePath, $svg)) { | |
| 190 | + return false; | |
| 191 | + } | |
| 192 | + | |
| 193 | +		$this->svgCache->set('svgFileArr', $this->svgFileArr); | |
| 194 | +		$this->svgCache->set('spritePath', $this->spritePath); | |
| 195 | + | |
| 196 | + return true; | |
| 197 | + } | |
| 198 | 198 | } | 
| @@ -13,46 +13,46 @@ | ||
| 13 | 13 | */ | 
| 14 | 14 | class SvgFileRepository extends \TYPO3\CMS\Core\Resource\FileRepository | 
| 15 | 15 |  { | 
| 16 | - /** | |
| 17 | - * Retrieves all used SVGs within given storage-array | |
| 18 | - * | |
| 19 | - * @param array $storageUids | |
| 20 | - * @return array | |
| 21 | - */ | |
| 22 | - public function findAllByStorageUids(array $storageUids): array | |
| 23 | -    { | |
| 24 | - return | |
| 25 | - ($queryBuilder = GeneralUtility::makeInstance(\TYPO3\CMS\Core\Database\ConnectionPool::class)->getQueryBuilderForTable($this->table)) | |
| 26 | -                ->select('sys_file.storage', 'sys_file.identifier', 'sys_file.sha1') | |
| 27 | - ->from($this->table) | |
| 28 | - ->innerJoin( | |
| 29 | - 'sys_file', | |
| 30 | - 'sys_file_reference', | |
| 31 | - 'sys_file_reference', | |
| 32 | - $queryBuilder->expr()->eq( | |
| 33 | - 'sys_file_reference.uid_local', | |
| 34 | -                        $queryBuilder->quoteIdentifier('sys_file.uid') | |
| 35 | - ) | |
| 36 | - ) | |
| 37 | - ->where( | |
| 38 | - $queryBuilder->expr()->in( | |
| 39 | - 'sys_file.storage', | |
| 40 | - $queryBuilder->createNamedParameter($storageUids, \Doctrine\DBAL\Connection::PARAM_INT_ARRAY) | |
| 41 | - ), | |
| 42 | - $queryBuilder->expr()->lt( | |
| 43 | - 'sys_file.size', | |
| 44 | - $queryBuilder->createNamedParameter((int) $GLOBALS['TSFE']->config['config']['svgstore.']['fileSize'], \PDO::PARAM_INT) | |
| 45 | - ), | |
| 46 | - $queryBuilder->expr()->eq( | |
| 47 | - 'sys_file.mime_type', | |
| 48 | -                        $queryBuilder->createNamedParameter('image/svg+xml', \PDO::PARAM_STR) | |
| 49 | - ) | |
| 50 | - ) | |
| 51 | -                ->groupBy('sys_file.uid') | |
| 52 | -                ->orderBy('sys_file.storage') | |
| 53 | -                ->addOrderBy('sys_file.identifier') | |
| 54 | - ->execute() | |
| 55 | - ->fetchAll() // TODO; use stdClass | |
| 56 | - ; | |
| 57 | - } | |
| 16 | + /** | |
| 17 | + * Retrieves all used SVGs within given storage-array | |
| 18 | + * | |
| 19 | + * @param array $storageUids | |
| 20 | + * @return array | |
| 21 | + */ | |
| 22 | + public function findAllByStorageUids(array $storageUids): array | |
| 23 | +	{ | |
| 24 | + return | |
| 25 | + ($queryBuilder = GeneralUtility::makeInstance(\TYPO3\CMS\Core\Database\ConnectionPool::class)->getQueryBuilderForTable($this->table)) | |
| 26 | +				->select('sys_file.storage', 'sys_file.identifier', 'sys_file.sha1') | |
| 27 | + ->from($this->table) | |
| 28 | + ->innerJoin( | |
| 29 | + 'sys_file', | |
| 30 | + 'sys_file_reference', | |
| 31 | + 'sys_file_reference', | |
| 32 | + $queryBuilder->expr()->eq( | |
| 33 | + 'sys_file_reference.uid_local', | |
| 34 | +						$queryBuilder->quoteIdentifier('sys_file.uid') | |
| 35 | + ) | |
| 36 | + ) | |
| 37 | + ->where( | |
| 38 | + $queryBuilder->expr()->in( | |
| 39 | + 'sys_file.storage', | |
| 40 | + $queryBuilder->createNamedParameter($storageUids, \Doctrine\DBAL\Connection::PARAM_INT_ARRAY) | |
| 41 | + ), | |
| 42 | + $queryBuilder->expr()->lt( | |
| 43 | + 'sys_file.size', | |
| 44 | + $queryBuilder->createNamedParameter((int) $GLOBALS['TSFE']->config['config']['svgstore.']['fileSize'], \PDO::PARAM_INT) | |
| 45 | + ), | |
| 46 | + $queryBuilder->expr()->eq( | |
| 47 | + 'sys_file.mime_type', | |
| 48 | +						$queryBuilder->createNamedParameter('image/svg+xml', \PDO::PARAM_STR) | |
| 49 | + ) | |
| 50 | + ) | |
| 51 | +				->groupBy('sys_file.uid') | |
| 52 | +				->orderBy('sys_file.storage') | |
| 53 | +				->addOrderBy('sys_file.identifier') | |
| 54 | + ->execute() | |
| 55 | + ->fetchAll() // TODO; use stdClass | |
| 56 | + ; | |
| 57 | + } | |
| 58 | 58 | } |