@@ -13,519 +13,519 @@ |
||
13 | 13 | class CleanHtmlService implements SingletonInterface |
14 | 14 | { |
15 | 15 | |
16 | - /** |
|
17 | - * Enable Debug comment in footer |
|
18 | - * |
|
19 | - * @var boolean |
|
20 | - */ |
|
21 | - protected $debugComment = false; |
|
22 | - |
|
23 | - /** |
|
24 | - * Format Type |
|
25 | - * |
|
26 | - * @var integer |
|
27 | - */ |
|
28 | - protected $formatType = 2; |
|
29 | - |
|
30 | - /** |
|
31 | - * Tab character |
|
32 | - * |
|
33 | - * @var string |
|
34 | - */ |
|
35 | - protected $tab = "\t"; |
|
36 | - |
|
37 | - /** |
|
38 | - * Newline character |
|
39 | - * |
|
40 | - * @var string |
|
41 | - */ |
|
42 | - protected $newline = "\n"; |
|
43 | - |
|
44 | - /** |
|
45 | - * Enable/disable UTF8 support |
|
46 | - * |
|
47 | - * @var boolean |
|
48 | - */ |
|
49 | - protected $utf8 = true; |
|
50 | - |
|
51 | - /** |
|
52 | - * Configured extra header comment |
|
53 | - * |
|
54 | - * @var string |
|
55 | - */ |
|
56 | - protected $headerComment = ''; |
|
57 | - |
|
58 | - /** |
|
59 | - * Set variables based on given config |
|
60 | - * |
|
61 | - * @param array $config |
|
62 | - * |
|
63 | - * @return void |
|
64 | - */ |
|
65 | - public function setVariables(array $config) |
|
66 | - { |
|
67 | - switch (TYPO3_OS) { // set newline |
|
68 | - case 'WIN': |
|
69 | - $this->newline = "\r\n"; |
|
70 | - break; |
|
71 | - default: |
|
72 | - $this->newline = "\n"; |
|
73 | - } |
|
74 | - |
|
75 | - if (!empty($config)) { |
|
76 | - if ($config['formatHtml'] && is_numeric($config['formatHtml'])) { |
|
77 | - $this->formatType = (int)$config['formatHtml']; |
|
78 | - } |
|
79 | - |
|
80 | - if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) { |
|
81 | - $this->tab = str_pad('', $config['formatHtml.']['tabSize'], ' '); |
|
82 | - } |
|
83 | - |
|
84 | - if (isset($config['enable_utf'])) { |
|
85 | - $this->utf8 = (bool)$config['enable_utf-8_support']; |
|
86 | - } |
|
87 | - |
|
88 | - if (isset($config['formatHtml.']['debugComment'])) { |
|
89 | - $this->debugComment = (bool)$config['formatHtml.']['debugComment']; |
|
90 | - } |
|
91 | - |
|
92 | - if (isset($config['headerComment'])) { |
|
93 | - $this->headerComment = $config['headerComment']; |
|
94 | - } |
|
95 | - } |
|
96 | - } |
|
97 | - |
|
98 | - /** |
|
99 | - * Clean given HTML with formatter |
|
100 | - * |
|
101 | - * @param string $html |
|
102 | - * @param array $config |
|
103 | - * |
|
104 | - * @return void |
|
105 | - */ |
|
106 | - public function clean(&$html, $config = []) |
|
107 | - { |
|
108 | - if (!empty($config)) { |
|
109 | - if ((bool)$config['enabled'] === false) { |
|
110 | - return; |
|
111 | - } |
|
112 | - |
|
113 | - $this->setVariables($config); |
|
114 | - } |
|
115 | - |
|
116 | - $manipulations = []; |
|
117 | - |
|
118 | - if (isset($config['removeGenerator']) && (bool)$config['removeGenerator']) { |
|
119 | - $manipulations['removeGenerator'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveGenerator'); |
|
120 | - } |
|
121 | - |
|
122 | - if (isset($config['removeComments']) && (bool)$config['removeComments']) { |
|
123 | - $manipulations['removeComments'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveComments'); |
|
124 | - } |
|
125 | - |
|
126 | - if (isset($config['removeBlurScript']) && (bool)$config['removeBlurScript']) { |
|
127 | - $manipulations['removeBlurScript'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveBlurScript'); |
|
128 | - } |
|
129 | - |
|
130 | - if (!empty($this->headerComment)) { |
|
131 | - $this->includeHeaderComment($html); |
|
132 | - } |
|
133 | - |
|
134 | - foreach ($manipulations as $key => $manipulation) { |
|
135 | - /** @var ManipulationInterface $manipulation */ |
|
136 | - $configuration = isset($config[$key . '.']) && is_array($config[$key . '.']) ? $config[$key . '.'] : []; |
|
137 | - $html = $manipulation->manipulate($html, $configuration); |
|
138 | - } |
|
139 | - |
|
140 | - if ($this->formatType) { |
|
141 | - $this->formatHtml($html); |
|
142 | - } |
|
143 | - } |
|
144 | - |
|
145 | - /** |
|
146 | - * Formats the (X)HTML code: |
|
147 | - * - taps according to the hirarchy of the tags |
|
148 | - * - removes empty spaces between tags |
|
149 | - * - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..) |
|
150 | - * choose from five options: |
|
151 | - * 0 => off |
|
152 | - * 1 => no line break at all (code in one line) |
|
153 | - * 2 => minimalistic line breaks (structure defining box-elements) |
|
154 | - * 3 => aesthetic line breaks (important box-elements) |
|
155 | - * 4 => logic line breaks (all box-elements) |
|
156 | - * 5 => max line breaks (all elements) |
|
157 | - * |
|
158 | - * @param string $html |
|
159 | - * |
|
160 | - * @return void |
|
161 | - */ |
|
162 | - protected function formatHtml(&$html) |
|
163 | - { |
|
164 | - // Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers |
|
165 | - preg_match_all( |
|
166 | - '/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im', |
|
167 | - $html, |
|
168 | - $matches |
|
169 | - ); |
|
170 | - $noFormat = $matches[0]; // do not format these block elements |
|
171 | - for ($i = 0; $i < count($noFormat); $i++) { |
|
172 | - $html = str_replace($noFormat[$i], "\n<!-- ELEMENT $i -->", $html); |
|
173 | - } |
|
174 | - |
|
175 | - // define box elements for formatting |
|
176 | - $trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section'; |
|
177 | - $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup'; |
|
178 | - $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script'; |
|
179 | - $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--'; |
|
180 | - $allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')'; |
|
181 | - $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)'; |
|
182 | - $structureBoxLikeElements = '(?>html|head|body|div|!--)'; |
|
183 | - |
|
184 | - // split html into it's elements |
|
185 | - $htmlArrayTemp = preg_split( |
|
186 | - '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', |
|
187 | - $html, |
|
188 | - -1, |
|
189 | - PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY |
|
190 | - ); |
|
191 | - // remove empty lines |
|
192 | - $htmlArray = ['']; |
|
193 | - $z = 1; |
|
194 | - for ($x = 0; $x < count($htmlArrayTemp); $x++) { |
|
195 | - $t = trim($htmlArrayTemp[$x]); |
|
196 | - if ($t !== '') { |
|
197 | - $htmlArray[$z] = $htmlArrayTemp[$x]; |
|
198 | - $z++; |
|
199 | - } else { |
|
200 | - $htmlArray[$z] = ' '; |
|
201 | - $z++; |
|
202 | - } |
|
203 | - } |
|
204 | - |
|
205 | - // rebuild html |
|
206 | - $html = ''; |
|
207 | - $tabs = 0; |
|
208 | - for ($x = 0; $x < count($htmlArray); $x++) { |
|
209 | - // check if the element should stand in a new line |
|
210 | - $newline = false; |
|
211 | - if (substr($htmlArray[$x - 1], 0, 5) == '<?xml') { |
|
212 | - $newline = true; |
|
213 | - } elseif ($this->formatType == 2 && ( // minimalistic line break |
|
214 | - # this element has a line break before itself |
|
215 | - preg_match( |
|
216 | - '/<' . $structureBoxLikeElements . '(.*)>/Usi', |
|
217 | - $htmlArray[$x] |
|
218 | - ) || preg_match( |
|
219 | - '/<' . $structureBoxLikeElements . '(.*) \/>/Usi', |
|
220 | - $htmlArray[$x] |
|
221 | - ) || # one element before is a element that has a line break after |
|
222 | - preg_match( |
|
223 | - '/<\/' . $structureBoxLikeElements . '(.*)>/Usi', |
|
224 | - $htmlArray[$x - 1] |
|
225 | - ) || substr( |
|
226 | - $htmlArray[$x - 1], |
|
227 | - 0, |
|
228 | - 4 |
|
229 | - ) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
230 | - ) { |
|
231 | - $newline = true; |
|
232 | - } elseif ($this->formatType == 3 && ( // aestetic line break |
|
233 | - # this element has a line break before itself |
|
234 | - preg_match( |
|
235 | - '/<' . $esteticBoxLikeElements . '(.*)>/Usi', |
|
236 | - $htmlArray[$x] |
|
237 | - ) || preg_match( |
|
238 | - '/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', |
|
239 | - $htmlArray[$x] |
|
240 | - ) || # one element before is a element that has a line break after |
|
241 | - preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
242 | - $htmlArray[$x - 1], |
|
243 | - 0, |
|
244 | - 4 |
|
245 | - ) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
246 | - ) { |
|
247 | - $newline = true; |
|
248 | - } elseif ($this->formatType >= 4 && ( // logical line break |
|
249 | - # this element has a line break before itself |
|
250 | - preg_match( |
|
251 | - '/<' . $allBoxLikeElements . '(.*)>/Usi', |
|
252 | - $htmlArray[$x] |
|
253 | - ) || preg_match( |
|
254 | - '/<' . $allBoxLikeElements . '(.*) \/>/Usi', |
|
255 | - $htmlArray[$x] |
|
256 | - ) || # one element before is a element that has a line break after |
|
257 | - preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
258 | - $htmlArray[$x - 1], |
|
259 | - 0, |
|
260 | - 4 |
|
261 | - ) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
262 | - ) { |
|
263 | - $newline = true; |
|
264 | - } |
|
265 | - |
|
266 | - // count down a tab |
|
267 | - if (substr($htmlArray[$x], 0, 2) == '</') { |
|
268 | - $tabs--; |
|
269 | - } |
|
270 | - |
|
271 | - // add tabs and line breaks in front of the current tag |
|
272 | - if ($newline) { |
|
273 | - $html .= $this->newline; |
|
274 | - for ($y = 0; $y < $tabs; $y++) { |
|
275 | - $html .= $this->tab; |
|
276 | - } |
|
277 | - } |
|
278 | - |
|
279 | - // remove white spaces and line breaks and add current tag to the html-string |
|
280 | - if (substr($htmlArray[$x - 1], 0, 4) == '<pre' // remove white space after line ending in PRE / TEXTAREA / comment |
|
281 | - || substr($htmlArray[$x - 1], 0, 9) == '<textarea' || substr($htmlArray[$x - 1], 0, 4) == '<!--' |
|
282 | - ) { |
|
283 | - $html .= $this->rTrimLines($htmlArray[$x]); |
|
284 | - } elseif (substr($htmlArray[$x], 0, 9) == '<![CDATA[' // remove multiple white space in CDATA / XML |
|
285 | - || substr($htmlArray[$x], 0, 5) == '<?xml' |
|
286 | - ) { |
|
287 | - $html .= $this->killWhiteSpace($htmlArray[$x]); |
|
288 | - } else { // remove all line breaks |
|
289 | - $html .= $this->killLineBreaks($htmlArray[$x]); |
|
290 | - } |
|
291 | - |
|
292 | - // count up a tab |
|
293 | - if (substr($htmlArray[$x], 0, 1) == '<' && substr($htmlArray[$x], 1, 1) != '/') { |
|
294 | - if (substr($htmlArray[$x], 1, 1) != ' ' && substr($htmlArray[$x], 1, 3) != 'img' && substr( |
|
295 | - $htmlArray[$x], |
|
296 | - 1, |
|
297 | - 2 |
|
298 | - ) != 'br' && substr($htmlArray[$x], 1, 2) != 'hr' && substr( |
|
299 | - $htmlArray[$x], |
|
300 | - 1, |
|
301 | - 5 |
|
302 | - ) != 'input' && substr($htmlArray[$x], 1, 4) != 'link' && substr( |
|
303 | - $htmlArray[$x], |
|
304 | - 1, |
|
305 | - 4 |
|
306 | - ) != 'meta' && substr($htmlArray[$x], 1, 4) != 'col ' && substr( |
|
307 | - $htmlArray[$x], |
|
308 | - 1, |
|
309 | - 5 |
|
310 | - ) != 'frame' && substr($htmlArray[$x], 1, 7) != 'isindex' && substr( |
|
311 | - $htmlArray[$x], |
|
312 | - 1, |
|
313 | - 5 |
|
314 | - ) != 'param' && substr($htmlArray[$x], 1, 4) != 'area' && substr( |
|
315 | - $htmlArray[$x], |
|
316 | - 1, |
|
317 | - 4 |
|
318 | - ) != 'base' && substr($htmlArray[$x], 0, 2) != '<!' && substr($htmlArray[$x], 0, 5) != '<?xml' |
|
319 | - ) { |
|
320 | - $tabs++; |
|
321 | - } |
|
322 | - } |
|
323 | - } |
|
324 | - |
|
325 | - // Remove empty lines |
|
326 | - if ($this->formatType > 1) { |
|
327 | - $this->removeEmptyLines($html); |
|
328 | - } |
|
329 | - |
|
330 | - // Restore saved comments, styles and java-scripts |
|
331 | - for ($i = 0; $i < count($noFormat); $i++) { |
|
332 | - $noFormat[$i] = $this->rTrimLines($noFormat[$i]); // remove white space after line ending |
|
333 | - $html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html); |
|
334 | - } |
|
335 | - |
|
336 | - // include debug comment at the end |
|
337 | - if ($tabs != 0 && $this->debugComment === true) { |
|
338 | - $html .= '<!--' . $tabs . " open elements found-->\r\n"; |
|
339 | - } |
|
340 | - } |
|
341 | - |
|
342 | - /** |
|
343 | - * Remove ALL line breaks and multiple white space |
|
344 | - * |
|
345 | - * @param string $html |
|
346 | - * |
|
347 | - * @return string |
|
348 | - */ |
|
349 | - protected function killLineBreaks($html) |
|
350 | - { |
|
351 | - $html = $this->convNlOs($html); |
|
352 | - $html = str_replace($this->newline, "", $html); |
|
353 | - // remove double empty spaces |
|
354 | - if ($this->utf8 == true) { |
|
355 | - $html = preg_replace('/\s\s+/u', ' ', $html); |
|
356 | - } else { |
|
357 | - $html = preg_replace('/\s\s+/', ' ', $html); |
|
358 | - } |
|
359 | - return $html; |
|
360 | - } |
|
361 | - |
|
362 | - /** |
|
363 | - * Remove multiple white space, keeps line breaks |
|
364 | - * |
|
365 | - * @param string $html |
|
366 | - * |
|
367 | - * @return string |
|
368 | - */ |
|
369 | - protected function killWhiteSpace($html) |
|
370 | - { |
|
371 | - $html = $this->convNlOs($html); |
|
372 | - $temp = explode($this->newline, $html); |
|
373 | - for ($i = 0; $i < count($temp); $i++) { |
|
374 | - if (!trim($temp[$i])) { |
|
375 | - unset($temp[$i]); |
|
376 | - } else { |
|
377 | - $temp[$i] = trim($temp[$i]); |
|
378 | - $temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]); |
|
379 | - } |
|
380 | - } |
|
381 | - $html = implode($this->newline, $temp); |
|
382 | - return $html; |
|
383 | - } |
|
384 | - |
|
385 | - /** |
|
386 | - * Remove white space at the end of lines, keeps other white space and line breaks |
|
387 | - * |
|
388 | - * @param string $html |
|
389 | - * |
|
390 | - * @return string |
|
391 | - */ |
|
392 | - protected function rTrimLines($html) |
|
393 | - { |
|
394 | - $html = $this->convNlOs($html); |
|
395 | - $temp = explode($this->newline, $html); |
|
396 | - for ($i = 0; $i < count($temp); $i++) { |
|
397 | - $temp[$i] = rtrim($temp[$i]); |
|
398 | - } |
|
399 | - $html = implode($this->newline, $temp); |
|
400 | - return $html; |
|
401 | - } |
|
402 | - |
|
403 | - /** |
|
404 | - * Convert newlines according to the current OS |
|
405 | - * |
|
406 | - * @param string $html |
|
407 | - * |
|
408 | - * @return string |
|
409 | - */ |
|
410 | - protected function convNlOs($html) |
|
411 | - { |
|
412 | - $html = preg_replace("(\r\n|\n|\r)", $this->newline, $html); |
|
413 | - return $html; |
|
414 | - } |
|
415 | - |
|
416 | - /** |
|
417 | - * Remove tabs and empty spaces before and after lines, transforms linebreaks system conform |
|
418 | - * |
|
419 | - * @param string $html Html-Code |
|
420 | - * |
|
421 | - * @return void |
|
422 | - */ |
|
423 | - protected function trimLines(&$html) |
|
424 | - { |
|
425 | - $html = str_replace("\t", "", $html); |
|
426 | - // convert newlines according to the current OS |
|
427 | - if (TYPO3_OS == "WIN") { |
|
428 | - $html = str_replace("\n", "\r\n", $html); |
|
429 | - } else { |
|
430 | - $html = str_replace("\r\n", "\n", $html); |
|
431 | - } |
|
432 | - $temp = explode($this->newline, $html); |
|
433 | - $temp = array_map('trim', $temp); |
|
434 | - $html = implode($this->newline, $temp); |
|
435 | - unset($temp); |
|
436 | - } |
|
437 | - |
|
438 | - /** |
|
439 | - * Remove empty lines |
|
440 | - * |
|
441 | - * @param string $html |
|
442 | - * |
|
443 | - * @return void |
|
444 | - */ |
|
445 | - protected function removeEmptyLines(&$html) |
|
446 | - { |
|
447 | - $temp = explode($this->newline, $html); |
|
448 | - $result = []; |
|
449 | - for ($i = 0; $i < count($temp); ++$i) { |
|
450 | - if ("" == trim($temp[$i])) { |
|
451 | - continue; |
|
452 | - } |
|
453 | - $result[] = $temp[$i]; |
|
454 | - } |
|
455 | - $html = implode($this->newline, $result); |
|
456 | - } |
|
457 | - |
|
458 | - /** |
|
459 | - * Remove new lines where unnecessary |
|
460 | - * spares line breaks within: pre, textarea, ... |
|
461 | - * |
|
462 | - * @param string $html |
|
463 | - * |
|
464 | - * @return void |
|
465 | - */ |
|
466 | - protected function removeNewLines(&$html) |
|
467 | - { |
|
468 | - $splitArray = [ |
|
469 | - 'textarea', |
|
470 | - 'pre' |
|
471 | - ]; // eventuell auch: span, script, style |
|
472 | - $peaces = preg_split('#(<(' . implode('|', $splitArray) . ').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
473 | - $html = ""; |
|
474 | - for ($i = 0; $i < count($peaces); $i++) { |
|
475 | - if (($i + 1) % 3 == 0) { |
|
476 | - continue; |
|
477 | - } |
|
478 | - $html .= (($i - 1) % 3 != 0) ? $this->killLineBreaks($peaces[$i]) : $peaces[$i]; |
|
479 | - } |
|
480 | - } |
|
481 | - |
|
482 | - /** |
|
483 | - * Remove obsolete link schema |
|
484 | - * |
|
485 | - * @param string $html |
|
486 | - * |
|
487 | - * @return void |
|
488 | - */ |
|
489 | - protected function removeLinkSchema(&$html) |
|
490 | - { |
|
491 | - $html = preg_replace("/<link rel=\"?schema.dc\"?.+?>/is", "", $html); |
|
492 | - } |
|
493 | - |
|
494 | - /** |
|
495 | - * Remove empty alt tags |
|
496 | - * |
|
497 | - * @param string $html |
|
498 | - * |
|
499 | - * @return void |
|
500 | - */ |
|
501 | - protected function removeEmptyAltAtr(&$html) |
|
502 | - { |
|
503 | - $html = str_replace("alt=\"\"", "", $html); |
|
504 | - } |
|
505 | - |
|
506 | - /** |
|
507 | - * Remove broken links in <a> tags |
|
508 | - * |
|
509 | - * @param string $html |
|
510 | - * |
|
511 | - * @return void |
|
512 | - */ |
|
513 | - protected function removeRealUrlBrokenRootLink(&$html) |
|
514 | - { |
|
515 | - $html = str_replace('href=".html"', 'href=""', $html); |
|
516 | - } |
|
517 | - |
|
518 | - /** |
|
519 | - * Include configured header comment in HTML content block |
|
520 | - * |
|
521 | - * @param $html |
|
522 | - */ |
|
523 | - public function includeHeaderComment(&$html) |
|
524 | - { |
|
525 | - if (!empty($this->headerComment)) { |
|
526 | - $html = preg_replace_callback('/<meta http-equiv(.*)>/Usi', function ($matches) { |
|
527 | - return trim($matches[0] . $this->newline . $this->tab . $this->tab . '<!-- ' . $this->headerComment . '-->'); |
|
528 | - }, $html, 1); |
|
529 | - } |
|
530 | - } |
|
16 | + /** |
|
17 | + * Enable Debug comment in footer |
|
18 | + * |
|
19 | + * @var boolean |
|
20 | + */ |
|
21 | + protected $debugComment = false; |
|
22 | + |
|
23 | + /** |
|
24 | + * Format Type |
|
25 | + * |
|
26 | + * @var integer |
|
27 | + */ |
|
28 | + protected $formatType = 2; |
|
29 | + |
|
30 | + /** |
|
31 | + * Tab character |
|
32 | + * |
|
33 | + * @var string |
|
34 | + */ |
|
35 | + protected $tab = "\t"; |
|
36 | + |
|
37 | + /** |
|
38 | + * Newline character |
|
39 | + * |
|
40 | + * @var string |
|
41 | + */ |
|
42 | + protected $newline = "\n"; |
|
43 | + |
|
44 | + /** |
|
45 | + * Enable/disable UTF8 support |
|
46 | + * |
|
47 | + * @var boolean |
|
48 | + */ |
|
49 | + protected $utf8 = true; |
|
50 | + |
|
51 | + /** |
|
52 | + * Configured extra header comment |
|
53 | + * |
|
54 | + * @var string |
|
55 | + */ |
|
56 | + protected $headerComment = ''; |
|
57 | + |
|
58 | + /** |
|
59 | + * Set variables based on given config |
|
60 | + * |
|
61 | + * @param array $config |
|
62 | + * |
|
63 | + * @return void |
|
64 | + */ |
|
65 | + public function setVariables(array $config) |
|
66 | + { |
|
67 | + switch (TYPO3_OS) { // set newline |
|
68 | + case 'WIN': |
|
69 | + $this->newline = "\r\n"; |
|
70 | + break; |
|
71 | + default: |
|
72 | + $this->newline = "\n"; |
|
73 | + } |
|
74 | + |
|
75 | + if (!empty($config)) { |
|
76 | + if ($config['formatHtml'] && is_numeric($config['formatHtml'])) { |
|
77 | + $this->formatType = (int)$config['formatHtml']; |
|
78 | + } |
|
79 | + |
|
80 | + if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) { |
|
81 | + $this->tab = str_pad('', $config['formatHtml.']['tabSize'], ' '); |
|
82 | + } |
|
83 | + |
|
84 | + if (isset($config['enable_utf'])) { |
|
85 | + $this->utf8 = (bool)$config['enable_utf-8_support']; |
|
86 | + } |
|
87 | + |
|
88 | + if (isset($config['formatHtml.']['debugComment'])) { |
|
89 | + $this->debugComment = (bool)$config['formatHtml.']['debugComment']; |
|
90 | + } |
|
91 | + |
|
92 | + if (isset($config['headerComment'])) { |
|
93 | + $this->headerComment = $config['headerComment']; |
|
94 | + } |
|
95 | + } |
|
96 | + } |
|
97 | + |
|
98 | + /** |
|
99 | + * Clean given HTML with formatter |
|
100 | + * |
|
101 | + * @param string $html |
|
102 | + * @param array $config |
|
103 | + * |
|
104 | + * @return void |
|
105 | + */ |
|
106 | + public function clean(&$html, $config = []) |
|
107 | + { |
|
108 | + if (!empty($config)) { |
|
109 | + if ((bool)$config['enabled'] === false) { |
|
110 | + return; |
|
111 | + } |
|
112 | + |
|
113 | + $this->setVariables($config); |
|
114 | + } |
|
115 | + |
|
116 | + $manipulations = []; |
|
117 | + |
|
118 | + if (isset($config['removeGenerator']) && (bool)$config['removeGenerator']) { |
|
119 | + $manipulations['removeGenerator'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveGenerator'); |
|
120 | + } |
|
121 | + |
|
122 | + if (isset($config['removeComments']) && (bool)$config['removeComments']) { |
|
123 | + $manipulations['removeComments'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveComments'); |
|
124 | + } |
|
125 | + |
|
126 | + if (isset($config['removeBlurScript']) && (bool)$config['removeBlurScript']) { |
|
127 | + $manipulations['removeBlurScript'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveBlurScript'); |
|
128 | + } |
|
129 | + |
|
130 | + if (!empty($this->headerComment)) { |
|
131 | + $this->includeHeaderComment($html); |
|
132 | + } |
|
133 | + |
|
134 | + foreach ($manipulations as $key => $manipulation) { |
|
135 | + /** @var ManipulationInterface $manipulation */ |
|
136 | + $configuration = isset($config[$key . '.']) && is_array($config[$key . '.']) ? $config[$key . '.'] : []; |
|
137 | + $html = $manipulation->manipulate($html, $configuration); |
|
138 | + } |
|
139 | + |
|
140 | + if ($this->formatType) { |
|
141 | + $this->formatHtml($html); |
|
142 | + } |
|
143 | + } |
|
144 | + |
|
145 | + /** |
|
146 | + * Formats the (X)HTML code: |
|
147 | + * - taps according to the hirarchy of the tags |
|
148 | + * - removes empty spaces between tags |
|
149 | + * - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..) |
|
150 | + * choose from five options: |
|
151 | + * 0 => off |
|
152 | + * 1 => no line break at all (code in one line) |
|
153 | + * 2 => minimalistic line breaks (structure defining box-elements) |
|
154 | + * 3 => aesthetic line breaks (important box-elements) |
|
155 | + * 4 => logic line breaks (all box-elements) |
|
156 | + * 5 => max line breaks (all elements) |
|
157 | + * |
|
158 | + * @param string $html |
|
159 | + * |
|
160 | + * @return void |
|
161 | + */ |
|
162 | + protected function formatHtml(&$html) |
|
163 | + { |
|
164 | + // Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers |
|
165 | + preg_match_all( |
|
166 | + '/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im', |
|
167 | + $html, |
|
168 | + $matches |
|
169 | + ); |
|
170 | + $noFormat = $matches[0]; // do not format these block elements |
|
171 | + for ($i = 0; $i < count($noFormat); $i++) { |
|
172 | + $html = str_replace($noFormat[$i], "\n<!-- ELEMENT $i -->", $html); |
|
173 | + } |
|
174 | + |
|
175 | + // define box elements for formatting |
|
176 | + $trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section'; |
|
177 | + $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup'; |
|
178 | + $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script'; |
|
179 | + $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--'; |
|
180 | + $allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')'; |
|
181 | + $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)'; |
|
182 | + $structureBoxLikeElements = '(?>html|head|body|div|!--)'; |
|
183 | + |
|
184 | + // split html into it's elements |
|
185 | + $htmlArrayTemp = preg_split( |
|
186 | + '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', |
|
187 | + $html, |
|
188 | + -1, |
|
189 | + PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY |
|
190 | + ); |
|
191 | + // remove empty lines |
|
192 | + $htmlArray = ['']; |
|
193 | + $z = 1; |
|
194 | + for ($x = 0; $x < count($htmlArrayTemp); $x++) { |
|
195 | + $t = trim($htmlArrayTemp[$x]); |
|
196 | + if ($t !== '') { |
|
197 | + $htmlArray[$z] = $htmlArrayTemp[$x]; |
|
198 | + $z++; |
|
199 | + } else { |
|
200 | + $htmlArray[$z] = ' '; |
|
201 | + $z++; |
|
202 | + } |
|
203 | + } |
|
204 | + |
|
205 | + // rebuild html |
|
206 | + $html = ''; |
|
207 | + $tabs = 0; |
|
208 | + for ($x = 0; $x < count($htmlArray); $x++) { |
|
209 | + // check if the element should stand in a new line |
|
210 | + $newline = false; |
|
211 | + if (substr($htmlArray[$x - 1], 0, 5) == '<?xml') { |
|
212 | + $newline = true; |
|
213 | + } elseif ($this->formatType == 2 && ( // minimalistic line break |
|
214 | + # this element has a line break before itself |
|
215 | + preg_match( |
|
216 | + '/<' . $structureBoxLikeElements . '(.*)>/Usi', |
|
217 | + $htmlArray[$x] |
|
218 | + ) || preg_match( |
|
219 | + '/<' . $structureBoxLikeElements . '(.*) \/>/Usi', |
|
220 | + $htmlArray[$x] |
|
221 | + ) || # one element before is a element that has a line break after |
|
222 | + preg_match( |
|
223 | + '/<\/' . $structureBoxLikeElements . '(.*)>/Usi', |
|
224 | + $htmlArray[$x - 1] |
|
225 | + ) || substr( |
|
226 | + $htmlArray[$x - 1], |
|
227 | + 0, |
|
228 | + 4 |
|
229 | + ) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
230 | + ) { |
|
231 | + $newline = true; |
|
232 | + } elseif ($this->formatType == 3 && ( // aestetic line break |
|
233 | + # this element has a line break before itself |
|
234 | + preg_match( |
|
235 | + '/<' . $esteticBoxLikeElements . '(.*)>/Usi', |
|
236 | + $htmlArray[$x] |
|
237 | + ) || preg_match( |
|
238 | + '/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', |
|
239 | + $htmlArray[$x] |
|
240 | + ) || # one element before is a element that has a line break after |
|
241 | + preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
242 | + $htmlArray[$x - 1], |
|
243 | + 0, |
|
244 | + 4 |
|
245 | + ) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
246 | + ) { |
|
247 | + $newline = true; |
|
248 | + } elseif ($this->formatType >= 4 && ( // logical line break |
|
249 | + # this element has a line break before itself |
|
250 | + preg_match( |
|
251 | + '/<' . $allBoxLikeElements . '(.*)>/Usi', |
|
252 | + $htmlArray[$x] |
|
253 | + ) || preg_match( |
|
254 | + '/<' . $allBoxLikeElements . '(.*) \/>/Usi', |
|
255 | + $htmlArray[$x] |
|
256 | + ) || # one element before is a element that has a line break after |
|
257 | + preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
258 | + $htmlArray[$x - 1], |
|
259 | + 0, |
|
260 | + 4 |
|
261 | + ) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
262 | + ) { |
|
263 | + $newline = true; |
|
264 | + } |
|
265 | + |
|
266 | + // count down a tab |
|
267 | + if (substr($htmlArray[$x], 0, 2) == '</') { |
|
268 | + $tabs--; |
|
269 | + } |
|
270 | + |
|
271 | + // add tabs and line breaks in front of the current tag |
|
272 | + if ($newline) { |
|
273 | + $html .= $this->newline; |
|
274 | + for ($y = 0; $y < $tabs; $y++) { |
|
275 | + $html .= $this->tab; |
|
276 | + } |
|
277 | + } |
|
278 | + |
|
279 | + // remove white spaces and line breaks and add current tag to the html-string |
|
280 | + if (substr($htmlArray[$x - 1], 0, 4) == '<pre' // remove white space after line ending in PRE / TEXTAREA / comment |
|
281 | + || substr($htmlArray[$x - 1], 0, 9) == '<textarea' || substr($htmlArray[$x - 1], 0, 4) == '<!--' |
|
282 | + ) { |
|
283 | + $html .= $this->rTrimLines($htmlArray[$x]); |
|
284 | + } elseif (substr($htmlArray[$x], 0, 9) == '<![CDATA[' // remove multiple white space in CDATA / XML |
|
285 | + || substr($htmlArray[$x], 0, 5) == '<?xml' |
|
286 | + ) { |
|
287 | + $html .= $this->killWhiteSpace($htmlArray[$x]); |
|
288 | + } else { // remove all line breaks |
|
289 | + $html .= $this->killLineBreaks($htmlArray[$x]); |
|
290 | + } |
|
291 | + |
|
292 | + // count up a tab |
|
293 | + if (substr($htmlArray[$x], 0, 1) == '<' && substr($htmlArray[$x], 1, 1) != '/') { |
|
294 | + if (substr($htmlArray[$x], 1, 1) != ' ' && substr($htmlArray[$x], 1, 3) != 'img' && substr( |
|
295 | + $htmlArray[$x], |
|
296 | + 1, |
|
297 | + 2 |
|
298 | + ) != 'br' && substr($htmlArray[$x], 1, 2) != 'hr' && substr( |
|
299 | + $htmlArray[$x], |
|
300 | + 1, |
|
301 | + 5 |
|
302 | + ) != 'input' && substr($htmlArray[$x], 1, 4) != 'link' && substr( |
|
303 | + $htmlArray[$x], |
|
304 | + 1, |
|
305 | + 4 |
|
306 | + ) != 'meta' && substr($htmlArray[$x], 1, 4) != 'col ' && substr( |
|
307 | + $htmlArray[$x], |
|
308 | + 1, |
|
309 | + 5 |
|
310 | + ) != 'frame' && substr($htmlArray[$x], 1, 7) != 'isindex' && substr( |
|
311 | + $htmlArray[$x], |
|
312 | + 1, |
|
313 | + 5 |
|
314 | + ) != 'param' && substr($htmlArray[$x], 1, 4) != 'area' && substr( |
|
315 | + $htmlArray[$x], |
|
316 | + 1, |
|
317 | + 4 |
|
318 | + ) != 'base' && substr($htmlArray[$x], 0, 2) != '<!' && substr($htmlArray[$x], 0, 5) != '<?xml' |
|
319 | + ) { |
|
320 | + $tabs++; |
|
321 | + } |
|
322 | + } |
|
323 | + } |
|
324 | + |
|
325 | + // Remove empty lines |
|
326 | + if ($this->formatType > 1) { |
|
327 | + $this->removeEmptyLines($html); |
|
328 | + } |
|
329 | + |
|
330 | + // Restore saved comments, styles and java-scripts |
|
331 | + for ($i = 0; $i < count($noFormat); $i++) { |
|
332 | + $noFormat[$i] = $this->rTrimLines($noFormat[$i]); // remove white space after line ending |
|
333 | + $html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html); |
|
334 | + } |
|
335 | + |
|
336 | + // include debug comment at the end |
|
337 | + if ($tabs != 0 && $this->debugComment === true) { |
|
338 | + $html .= '<!--' . $tabs . " open elements found-->\r\n"; |
|
339 | + } |
|
340 | + } |
|
341 | + |
|
342 | + /** |
|
343 | + * Remove ALL line breaks and multiple white space |
|
344 | + * |
|
345 | + * @param string $html |
|
346 | + * |
|
347 | + * @return string |
|
348 | + */ |
|
349 | + protected function killLineBreaks($html) |
|
350 | + { |
|
351 | + $html = $this->convNlOs($html); |
|
352 | + $html = str_replace($this->newline, "", $html); |
|
353 | + // remove double empty spaces |
|
354 | + if ($this->utf8 == true) { |
|
355 | + $html = preg_replace('/\s\s+/u', ' ', $html); |
|
356 | + } else { |
|
357 | + $html = preg_replace('/\s\s+/', ' ', $html); |
|
358 | + } |
|
359 | + return $html; |
|
360 | + } |
|
361 | + |
|
362 | + /** |
|
363 | + * Remove multiple white space, keeps line breaks |
|
364 | + * |
|
365 | + * @param string $html |
|
366 | + * |
|
367 | + * @return string |
|
368 | + */ |
|
369 | + protected function killWhiteSpace($html) |
|
370 | + { |
|
371 | + $html = $this->convNlOs($html); |
|
372 | + $temp = explode($this->newline, $html); |
|
373 | + for ($i = 0; $i < count($temp); $i++) { |
|
374 | + if (!trim($temp[$i])) { |
|
375 | + unset($temp[$i]); |
|
376 | + } else { |
|
377 | + $temp[$i] = trim($temp[$i]); |
|
378 | + $temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]); |
|
379 | + } |
|
380 | + } |
|
381 | + $html = implode($this->newline, $temp); |
|
382 | + return $html; |
|
383 | + } |
|
384 | + |
|
385 | + /** |
|
386 | + * Remove white space at the end of lines, keeps other white space and line breaks |
|
387 | + * |
|
388 | + * @param string $html |
|
389 | + * |
|
390 | + * @return string |
|
391 | + */ |
|
392 | + protected function rTrimLines($html) |
|
393 | + { |
|
394 | + $html = $this->convNlOs($html); |
|
395 | + $temp = explode($this->newline, $html); |
|
396 | + for ($i = 0; $i < count($temp); $i++) { |
|
397 | + $temp[$i] = rtrim($temp[$i]); |
|
398 | + } |
|
399 | + $html = implode($this->newline, $temp); |
|
400 | + return $html; |
|
401 | + } |
|
402 | + |
|
403 | + /** |
|
404 | + * Convert newlines according to the current OS |
|
405 | + * |
|
406 | + * @param string $html |
|
407 | + * |
|
408 | + * @return string |
|
409 | + */ |
|
410 | + protected function convNlOs($html) |
|
411 | + { |
|
412 | + $html = preg_replace("(\r\n|\n|\r)", $this->newline, $html); |
|
413 | + return $html; |
|
414 | + } |
|
415 | + |
|
416 | + /** |
|
417 | + * Remove tabs and empty spaces before and after lines, transforms linebreaks system conform |
|
418 | + * |
|
419 | + * @param string $html Html-Code |
|
420 | + * |
|
421 | + * @return void |
|
422 | + */ |
|
423 | + protected function trimLines(&$html) |
|
424 | + { |
|
425 | + $html = str_replace("\t", "", $html); |
|
426 | + // convert newlines according to the current OS |
|
427 | + if (TYPO3_OS == "WIN") { |
|
428 | + $html = str_replace("\n", "\r\n", $html); |
|
429 | + } else { |
|
430 | + $html = str_replace("\r\n", "\n", $html); |
|
431 | + } |
|
432 | + $temp = explode($this->newline, $html); |
|
433 | + $temp = array_map('trim', $temp); |
|
434 | + $html = implode($this->newline, $temp); |
|
435 | + unset($temp); |
|
436 | + } |
|
437 | + |
|
438 | + /** |
|
439 | + * Remove empty lines |
|
440 | + * |
|
441 | + * @param string $html |
|
442 | + * |
|
443 | + * @return void |
|
444 | + */ |
|
445 | + protected function removeEmptyLines(&$html) |
|
446 | + { |
|
447 | + $temp = explode($this->newline, $html); |
|
448 | + $result = []; |
|
449 | + for ($i = 0; $i < count($temp); ++$i) { |
|
450 | + if ("" == trim($temp[$i])) { |
|
451 | + continue; |
|
452 | + } |
|
453 | + $result[] = $temp[$i]; |
|
454 | + } |
|
455 | + $html = implode($this->newline, $result); |
|
456 | + } |
|
457 | + |
|
458 | + /** |
|
459 | + * Remove new lines where unnecessary |
|
460 | + * spares line breaks within: pre, textarea, ... |
|
461 | + * |
|
462 | + * @param string $html |
|
463 | + * |
|
464 | + * @return void |
|
465 | + */ |
|
466 | + protected function removeNewLines(&$html) |
|
467 | + { |
|
468 | + $splitArray = [ |
|
469 | + 'textarea', |
|
470 | + 'pre' |
|
471 | + ]; // eventuell auch: span, script, style |
|
472 | + $peaces = preg_split('#(<(' . implode('|', $splitArray) . ').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
473 | + $html = ""; |
|
474 | + for ($i = 0; $i < count($peaces); $i++) { |
|
475 | + if (($i + 1) % 3 == 0) { |
|
476 | + continue; |
|
477 | + } |
|
478 | + $html .= (($i - 1) % 3 != 0) ? $this->killLineBreaks($peaces[$i]) : $peaces[$i]; |
|
479 | + } |
|
480 | + } |
|
481 | + |
|
482 | + /** |
|
483 | + * Remove obsolete link schema |
|
484 | + * |
|
485 | + * @param string $html |
|
486 | + * |
|
487 | + * @return void |
|
488 | + */ |
|
489 | + protected function removeLinkSchema(&$html) |
|
490 | + { |
|
491 | + $html = preg_replace("/<link rel=\"?schema.dc\"?.+?>/is", "", $html); |
|
492 | + } |
|
493 | + |
|
494 | + /** |
|
495 | + * Remove empty alt tags |
|
496 | + * |
|
497 | + * @param string $html |
|
498 | + * |
|
499 | + * @return void |
|
500 | + */ |
|
501 | + protected function removeEmptyAltAtr(&$html) |
|
502 | + { |
|
503 | + $html = str_replace("alt=\"\"", "", $html); |
|
504 | + } |
|
505 | + |
|
506 | + /** |
|
507 | + * Remove broken links in <a> tags |
|
508 | + * |
|
509 | + * @param string $html |
|
510 | + * |
|
511 | + * @return void |
|
512 | + */ |
|
513 | + protected function removeRealUrlBrokenRootLink(&$html) |
|
514 | + { |
|
515 | + $html = str_replace('href=".html"', 'href=""', $html); |
|
516 | + } |
|
517 | + |
|
518 | + /** |
|
519 | + * Include configured header comment in HTML content block |
|
520 | + * |
|
521 | + * @param $html |
|
522 | + */ |
|
523 | + public function includeHeaderComment(&$html) |
|
524 | + { |
|
525 | + if (!empty($this->headerComment)) { |
|
526 | + $html = preg_replace_callback('/<meta http-equiv(.*)>/Usi', function ($matches) { |
|
527 | + return trim($matches[0] . $this->newline . $this->tab . $this->tab . '<!-- ' . $this->headerComment . '-->'); |
|
528 | + }, $html, 1); |
|
529 | + } |
|
530 | + } |
|
531 | 531 | } |
@@ -74,7 +74,7 @@ discard block |
||
74 | 74 | |
75 | 75 | if (!empty($config)) { |
76 | 76 | if ($config['formatHtml'] && is_numeric($config['formatHtml'])) { |
77 | - $this->formatType = (int)$config['formatHtml']; |
|
77 | + $this->formatType = (int) $config['formatHtml']; |
|
78 | 78 | } |
79 | 79 | |
80 | 80 | if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) { |
@@ -82,11 +82,11 @@ discard block |
||
82 | 82 | } |
83 | 83 | |
84 | 84 | if (isset($config['enable_utf'])) { |
85 | - $this->utf8 = (bool)$config['enable_utf-8_support']; |
|
85 | + $this->utf8 = (bool) $config['enable_utf-8_support']; |
|
86 | 86 | } |
87 | 87 | |
88 | 88 | if (isset($config['formatHtml.']['debugComment'])) { |
89 | - $this->debugComment = (bool)$config['formatHtml.']['debugComment']; |
|
89 | + $this->debugComment = (bool) $config['formatHtml.']['debugComment']; |
|
90 | 90 | } |
91 | 91 | |
92 | 92 | if (isset($config['headerComment'])) { |
@@ -106,7 +106,7 @@ discard block |
||
106 | 106 | public function clean(&$html, $config = []) |
107 | 107 | { |
108 | 108 | if (!empty($config)) { |
109 | - if ((bool)$config['enabled'] === false) { |
|
109 | + if ((bool) $config['enabled'] === false) { |
|
110 | 110 | return; |
111 | 111 | } |
112 | 112 | |
@@ -115,15 +115,15 @@ discard block |
||
115 | 115 | |
116 | 116 | $manipulations = []; |
117 | 117 | |
118 | - if (isset($config['removeGenerator']) && (bool)$config['removeGenerator']) { |
|
118 | + if (isset($config['removeGenerator']) && (bool) $config['removeGenerator']) { |
|
119 | 119 | $manipulations['removeGenerator'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveGenerator'); |
120 | 120 | } |
121 | 121 | |
122 | - if (isset($config['removeComments']) && (bool)$config['removeComments']) { |
|
122 | + if (isset($config['removeComments']) && (bool) $config['removeComments']) { |
|
123 | 123 | $manipulations['removeComments'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveComments'); |
124 | 124 | } |
125 | 125 | |
126 | - if (isset($config['removeBlurScript']) && (bool)$config['removeBlurScript']) { |
|
126 | + if (isset($config['removeBlurScript']) && (bool) $config['removeBlurScript']) { |
|
127 | 127 | $manipulations['removeBlurScript'] = GeneralUtility::makeInstance('HTML\\Sourceopt\\Manipulation\\RemoveBlurScript'); |
128 | 128 | } |
129 | 129 | |
@@ -133,7 +133,7 @@ discard block |
||
133 | 133 | |
134 | 134 | foreach ($manipulations as $key => $manipulation) { |
135 | 135 | /** @var ManipulationInterface $manipulation */ |
136 | - $configuration = isset($config[$key . '.']) && is_array($config[$key . '.']) ? $config[$key . '.'] : []; |
|
136 | + $configuration = isset($config[$key.'.']) && is_array($config[$key.'.']) ? $config[$key.'.'] : []; |
|
137 | 137 | $html = $manipulation->manipulate($html, $configuration); |
138 | 138 | } |
139 | 139 | |
@@ -177,7 +177,7 @@ discard block |
||
177 | 177 | $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup'; |
178 | 178 | $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script'; |
179 | 179 | $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--'; |
180 | - $allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')'; |
|
180 | + $allBoxLikeElements = '(?>'.$trueBoxElements.'|'.$functionalBoxElements.'|'.$usableBoxElements.'|'.$imagineBoxElements.')'; |
|
181 | 181 | $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)'; |
182 | 182 | $structureBoxLikeElements = '(?>html|head|body|div|!--)'; |
183 | 183 | |
@@ -186,7 +186,7 @@ discard block |
||
186 | 186 | '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', |
187 | 187 | $html, |
188 | 188 | -1, |
189 | - PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY |
|
189 | + PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY |
|
190 | 190 | ); |
191 | 191 | // remove empty lines |
192 | 192 | $htmlArray = ['']; |
@@ -213,52 +213,52 @@ discard block |
||
213 | 213 | } elseif ($this->formatType == 2 && ( // minimalistic line break |
214 | 214 | # this element has a line break before itself |
215 | 215 | preg_match( |
216 | - '/<' . $structureBoxLikeElements . '(.*)>/Usi', |
|
216 | + '/<'.$structureBoxLikeElements.'(.*)>/Usi', |
|
217 | 217 | $htmlArray[$x] |
218 | 218 | ) || preg_match( |
219 | - '/<' . $structureBoxLikeElements . '(.*) \/>/Usi', |
|
219 | + '/<'.$structureBoxLikeElements.'(.*) \/>/Usi', |
|
220 | 220 | $htmlArray[$x] |
221 | 221 | ) || # one element before is a element that has a line break after |
222 | 222 | preg_match( |
223 | - '/<\/' . $structureBoxLikeElements . '(.*)>/Usi', |
|
223 | + '/<\/'.$structureBoxLikeElements.'(.*)>/Usi', |
|
224 | 224 | $htmlArray[$x - 1] |
225 | 225 | ) || substr( |
226 | 226 | $htmlArray[$x - 1], |
227 | 227 | 0, |
228 | 228 | 4 |
229 | - ) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
229 | + ) == '<!--' || preg_match('/<'.$structureBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
230 | 230 | ) { |
231 | 231 | $newline = true; |
232 | 232 | } elseif ($this->formatType == 3 && ( // aestetic line break |
233 | 233 | # this element has a line break before itself |
234 | 234 | preg_match( |
235 | - '/<' . $esteticBoxLikeElements . '(.*)>/Usi', |
|
235 | + '/<'.$esteticBoxLikeElements.'(.*)>/Usi', |
|
236 | 236 | $htmlArray[$x] |
237 | 237 | ) || preg_match( |
238 | - '/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', |
|
238 | + '/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', |
|
239 | 239 | $htmlArray[$x] |
240 | 240 | ) || # one element before is a element that has a line break after |
241 | - preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
241 | + preg_match('/<\/'.$esteticBoxLikeElements.'(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
242 | 242 | $htmlArray[$x - 1], |
243 | 243 | 0, |
244 | 244 | 4 |
245 | - ) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
245 | + ) == '<!--' || preg_match('/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
246 | 246 | ) { |
247 | 247 | $newline = true; |
248 | 248 | } elseif ($this->formatType >= 4 && ( // logical line break |
249 | 249 | # this element has a line break before itself |
250 | 250 | preg_match( |
251 | - '/<' . $allBoxLikeElements . '(.*)>/Usi', |
|
251 | + '/<'.$allBoxLikeElements.'(.*)>/Usi', |
|
252 | 252 | $htmlArray[$x] |
253 | 253 | ) || preg_match( |
254 | - '/<' . $allBoxLikeElements . '(.*) \/>/Usi', |
|
254 | + '/<'.$allBoxLikeElements.'(.*) \/>/Usi', |
|
255 | 255 | $htmlArray[$x] |
256 | 256 | ) || # one element before is a element that has a line break after |
257 | - preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
257 | + preg_match('/<\/'.$allBoxLikeElements.'(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
258 | 258 | $htmlArray[$x - 1], |
259 | 259 | 0, |
260 | 260 | 4 |
261 | - ) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
261 | + ) == '<!--' || preg_match('/<'.$allBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
262 | 262 | ) { |
263 | 263 | $newline = true; |
264 | 264 | } |
@@ -335,7 +335,7 @@ discard block |
||
335 | 335 | |
336 | 336 | // include debug comment at the end |
337 | 337 | if ($tabs != 0 && $this->debugComment === true) { |
338 | - $html .= '<!--' . $tabs . " open elements found-->\r\n"; |
|
338 | + $html .= '<!--'.$tabs." open elements found-->\r\n"; |
|
339 | 339 | } |
340 | 340 | } |
341 | 341 | |
@@ -469,7 +469,7 @@ discard block |
||
469 | 469 | 'textarea', |
470 | 470 | 'pre' |
471 | 471 | ]; // eventuell auch: span, script, style |
472 | - $peaces = preg_split('#(<(' . implode('|', $splitArray) . ').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
472 | + $peaces = preg_split('#(<('.implode('|', $splitArray).').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
473 | 473 | $html = ""; |
474 | 474 | for ($i = 0; $i < count($peaces); $i++) { |
475 | 475 | if (($i + 1) % 3 == 0) { |
@@ -523,8 +523,8 @@ discard block |
||
523 | 523 | public function includeHeaderComment(&$html) |
524 | 524 | { |
525 | 525 | if (!empty($this->headerComment)) { |
526 | - $html = preg_replace_callback('/<meta http-equiv(.*)>/Usi', function ($matches) { |
|
527 | - return trim($matches[0] . $this->newline . $this->tab . $this->tab . '<!-- ' . $this->headerComment . '-->'); |
|
526 | + $html = preg_replace_callback('/<meta http-equiv(.*)>/Usi', function($matches) { |
|
527 | + return trim($matches[0].$this->newline.$this->tab.$this->tab.'<!-- '.$this->headerComment.'-->'); |
|
528 | 528 | }, $html, 1); |
529 | 529 | } |
530 | 530 | } |