@@ -573,13 +573,17 @@ |
||
573 | 573 | /** |
574 | 574 | * Include configured header comment in HTML content block |
575 | 575 | * |
576 | - * @param $html |
|
576 | + * @param string $html |
|
577 | 577 | */ |
578 | 578 | public function includeHeaderComment(&$html) |
579 | 579 | { |
580 | 580 | if (!empty($this->headerComment)) { |
581 | 581 | $html = preg_replace_callback( |
582 | 582 | '/<meta http-equiv(.*)>/Usi', |
583 | + |
|
584 | + /** |
|
585 | + * @param string $matches |
|
586 | + */ |
|
583 | 587 | function ($matches) { |
584 | 588 | return trim($matches[0] . $this->newline . $this->tab . $this->tab . '<!-- ' . $this->headerComment . '-->'); |
585 | 589 | }, |
@@ -4,588 +4,587 @@ |
||
4 | 4 | /** |
5 | 5 | * Service: Clean parsed HTML functionality |
6 | 6 | * Based on the extension 'sourceopt' |
7 | - |
|
8 | 7 | */ |
9 | 8 | class CleanHtmlService implements \TYPO3\CMS\Core\SingletonInterface |
10 | 9 | { |
11 | 10 | |
12 | - /** |
|
13 | - * Enable Debug comment in footer |
|
14 | - * |
|
15 | - * @var boolean |
|
16 | - */ |
|
17 | - protected $debugComment = false; |
|
18 | - |
|
19 | - /** |
|
20 | - * Format Type |
|
21 | - * |
|
22 | - * @var integer |
|
23 | - */ |
|
24 | - protected $formatType = 2; |
|
25 | - |
|
26 | - /** |
|
27 | - * Tab character |
|
28 | - * |
|
29 | - * @var string |
|
30 | - */ |
|
31 | - protected $tab = "\t"; |
|
32 | - |
|
33 | - /** |
|
34 | - * Newline character |
|
35 | - * |
|
36 | - * @var string |
|
37 | - */ |
|
38 | - protected $newline = "\n"; |
|
39 | - |
|
40 | - /** |
|
41 | - * Enable/disable UTF8 support |
|
42 | - * |
|
43 | - * @var boolean |
|
44 | - */ |
|
45 | - protected $utf8 = true; |
|
46 | - |
|
47 | - /** |
|
48 | - * Configured extra header comment |
|
49 | - * |
|
50 | - * @var string |
|
51 | - */ |
|
52 | - protected $headerComment = ''; |
|
53 | - |
|
54 | - /** |
|
55 | - * Enable/disable removal of generator tag |
|
56 | - * |
|
57 | - * @var boolean |
|
58 | - */ |
|
59 | - protected $removeGenerator = true; |
|
60 | - |
|
61 | - /** |
|
62 | - * Enable/disable removal of comments |
|
63 | - * |
|
64 | - * @var boolean |
|
65 | - */ |
|
66 | - protected $removeComments = true; |
|
67 | - |
|
68 | - /** |
|
69 | - * Enable/disable removal of blur scripts |
|
70 | - * |
|
71 | - * @var boolean |
|
72 | - */ |
|
73 | - protected $removeBlurScript = true; |
|
74 | - |
|
75 | - /** |
|
76 | - * Patterns for white-listing comments inside content |
|
77 | - * |
|
78 | - * @var array |
|
79 | - */ |
|
80 | - protected $whiteListCommentsPatterns = array(); |
|
81 | - |
|
82 | - /** |
|
83 | - * Set variables based on given config |
|
84 | - * |
|
85 | - * @param array $config |
|
86 | - * |
|
87 | - * @return void |
|
88 | - */ |
|
89 | - public function setVariables(array $config) |
|
90 | - { |
|
91 | - switch (TYPO3_OS) { // set newline |
|
92 | - case 'WIN': |
|
93 | - $this->newline = "\r\n"; |
|
94 | - break; |
|
95 | - default: |
|
96 | - $this->newline = "\n"; |
|
97 | - } |
|
98 | - |
|
99 | - if (!empty($config)) { |
|
100 | - if ($config['formatHtml'] && is_numeric($config['formatHtml'])) { |
|
101 | - $this->formatType = (int) $config['formatHtml']; |
|
102 | - } |
|
103 | - |
|
104 | - if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) { |
|
105 | - $this->tab = str_pad('', $config['formatHtml.']['tabSize'], ' '); |
|
106 | - } |
|
107 | - |
|
108 | - if (isset($config['enable_utf'])) { |
|
109 | - $this->utf8 = (bool) $config['enable_utf-8_support']; |
|
110 | - } |
|
111 | - |
|
112 | - if (isset($config['formatHtml.']['debugComment'])) { |
|
113 | - $this->debugComment = (bool) $config['formatHtml.']['debugComment']; |
|
114 | - } |
|
115 | - |
|
116 | - if (isset($config['headerComment'])) { |
|
117 | - $this->headerComment = $config['headerComment']; |
|
118 | - } |
|
119 | - |
|
120 | - if (isset($config['removeGenerator'])) { |
|
121 | - $this->removeGenerator = (bool) $config['removeGenerator']; |
|
122 | - } |
|
123 | - |
|
124 | - if (isset($config['removeComments'])) { |
|
125 | - $this->removeComments = (bool) $config['removeComments']; |
|
126 | - |
|
127 | - if (isset($config['removeComments.'])) { |
|
128 | - $this->whiteListCommentsPatterns = $config['removeComments.']['keep.']; |
|
129 | - } |
|
130 | - } |
|
131 | - |
|
132 | - if (isset($config['removeBlurScript'])) { |
|
133 | - $this->removeBlurScript = (bool) $config['removeBlurScript']; |
|
134 | - } |
|
135 | - } |
|
136 | - } |
|
137 | - |
|
138 | - /** |
|
139 | - * Clean given HTML with formatter |
|
140 | - * |
|
141 | - * @param string $html |
|
142 | - * @param array $config |
|
143 | - * |
|
144 | - * @return void |
|
145 | - */ |
|
146 | - public function clean(&$html, $config = array()) |
|
147 | - { |
|
148 | - if (!empty($config)) { |
|
149 | - if ((bool) $config['enabled'] === false) { |
|
150 | - return; |
|
151 | - } |
|
152 | - |
|
153 | - $this->setVariables($config); |
|
154 | - } |
|
155 | - |
|
156 | - if (true === $this->removeGenerator) { |
|
157 | - $this->removeGenerator($html); |
|
158 | - } |
|
159 | - |
|
160 | - if (true === $this->removeComments) { |
|
161 | - $this->removeComments($html); |
|
162 | - } |
|
163 | - |
|
164 | - if (true === $this->removeBlurScript) { |
|
165 | - $this->removeBlurScript($html); |
|
166 | - } |
|
167 | - |
|
168 | - if (!empty($this->headerComment)) { |
|
169 | - $this->includeHeaderComment($html); |
|
170 | - } |
|
171 | - |
|
172 | - if ($this->formatType) { |
|
173 | - $this->formatHtml($html); |
|
174 | - } |
|
175 | - } |
|
176 | - |
|
177 | - /** |
|
178 | - * Formats the (X)HTML code: |
|
179 | - * - taps according to the hirarchy of the tags |
|
180 | - * - removes empty spaces between tags |
|
181 | - * - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..) |
|
182 | - * choose from five options: |
|
183 | - * 0 => off |
|
184 | - * 1 => no line break at all (code in one line) |
|
185 | - * 2 => minimalistic line breaks (structure defining box-elements) |
|
186 | - * 3 => aesthetic line breaks (important box-elements) |
|
187 | - * 4 => logic line breaks (all box-elements) |
|
188 | - * 5 => max line breaks (all elements) |
|
189 | - * |
|
190 | - * @param string $html |
|
191 | - * |
|
192 | - * @return void |
|
193 | - */ |
|
194 | - protected function formatHtml(&$html) |
|
195 | - { |
|
196 | - // Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers |
|
197 | - preg_match_all('/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im', $html, $matches); |
|
198 | - $no_format = $matches[0]; // do not format these block elements |
|
199 | - for ($i = 0; $i < count($no_format); $i++) { |
|
200 | - $html = str_replace($no_format[$i], "\n<!-- ELEMENT $i -->", $html); |
|
201 | - } |
|
202 | - |
|
203 | - // define box elements for formatting |
|
204 | - $trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section'; |
|
205 | - $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup'; |
|
206 | - $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script'; |
|
207 | - $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--'; |
|
208 | - $allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')'; |
|
209 | - $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)'; |
|
210 | - $structureBoxLikeElements = '(?>html|head|body|div|!--)'; |
|
211 | - |
|
212 | - // split html into it's elements |
|
213 | - $html_array_temp = preg_split('/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', $html, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); |
|
214 | - // remove empty lines |
|
215 | - $html_array = array(''); |
|
216 | - $z = 1; |
|
217 | - for ($x = 0; $x < count($html_array_temp); $x++) { |
|
218 | - $t = trim($html_array_temp[$x]); |
|
219 | - if ($t !== '') { |
|
220 | - $html_array[$z] = $html_array_temp[$x]; |
|
221 | - $z++; |
|
222 | - // if the trimmed line was empty but the original wasn't, search for inline element closing tags in the last $html_array element |
|
223 | - } else { |
|
224 | - // if ($t !== $html_array_temp[$x] && preg_match('/<\/' . $inlineElements . '( .*)? >/Usi', $html_array[$z - 1]) === 1) |
|
225 | - $html_array[$z] = ' '; |
|
226 | - $z++; |
|
227 | - } |
|
228 | - } |
|
229 | - |
|
230 | - // rebuild html |
|
231 | - $html = ''; |
|
232 | - $tabs = 0; |
|
233 | - for ($x = 0; $x < count($html_array); $x++) { |
|
234 | - // check if the element should stand in a new line |
|
235 | - $newline = false; |
|
236 | - if (substr($html_array[$x - 1], 0, 5) == '<?xml') { |
|
237 | - $newline = true; |
|
238 | - } elseif ($this->formatType == 2 && ( // minimalistic line break |
|
239 | - # this element has a line break before itself |
|
240 | - preg_match('/<' . $structureBoxLikeElements . '(.*)>/Usi', $html_array[$x]) || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $html_array[$x]) || # one element before is a element that has a line break after |
|
241 | - preg_match('/<\/' . $structureBoxLikeElements . '(.*)>/Usi', $html_array[$x - 1]) || substr($html_array[$x - 1], 0, 4) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $html_array[$x - 1])) |
|
242 | - ) { |
|
243 | - $newline = true; |
|
244 | - } elseif ($this->formatType == 3 && ( // aestetic line break |
|
245 | - # this element has a line break before itself |
|
246 | - preg_match('/<' . $esteticBoxLikeElements . '(.*)>/Usi', $html_array[$x]) || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $html_array[$x]) || # one element before is a element that has a line break after |
|
247 | - preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $html_array[$x - 1]) || substr($html_array[$x - 1], 0, 4) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $html_array[$x - 1])) |
|
248 | - ) { |
|
249 | - $newline = true; |
|
250 | - } elseif ($this->formatType >= 4 && ( // logical line break |
|
251 | - # this element has a line break before itself |
|
252 | - preg_match('/<' . $allBoxLikeElements . '(.*)>/Usi', $html_array[$x]) || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $html_array[$x]) || # one element before is a element that has a line break after |
|
253 | - preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $html_array[$x - 1]) || substr($html_array[$x - 1], 0, 4) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $html_array[$x - 1])) |
|
254 | - ) { |
|
255 | - $newline = true; |
|
256 | - } |
|
257 | - |
|
258 | - // count down a tab |
|
259 | - if (substr($html_array[$x], 0, 2) == '</') { |
|
260 | - $tabs--; |
|
261 | - } |
|
262 | - |
|
263 | - // add tabs and line breaks in front of the current tag |
|
264 | - if ($newline) { |
|
265 | - $html .= $this->newline; |
|
266 | - for ($y = 0; $y < $tabs; $y++) { |
|
267 | - $html .= $this->tab; |
|
268 | - } |
|
269 | - } |
|
270 | - |
|
271 | - // remove white spaces and line breaks and add current tag to the html-string |
|
272 | - if (substr($html_array[$x - 1], 0, 4) == '<pre' // remove white space after line ending in PRE / TEXTAREA / comment |
|
273 | - || substr($html_array[$x - 1], 0, 9) == '<textarea' || substr($html_array[$x - 1], 0, 4) == '<!--' |
|
274 | - ) { |
|
275 | - $html .= $this->rTrimLines($html_array[$x]); |
|
276 | - } elseif (substr($html_array[$x], 0, 9) == '<![CDATA[' // remove multiple white space in CDATA / XML |
|
277 | - || substr($html_array[$x], 0, 5) == '<?xml' |
|
278 | - ) { |
|
279 | - $html .= $this->killWhiteSpace($html_array[$x]); |
|
280 | - } else { // remove all line breaks |
|
281 | - $html .= $this->killLineBreaks($html_array[$x]); |
|
282 | - } |
|
283 | - |
|
284 | - // count up a tab |
|
285 | - if (substr($html_array[$x], 0, 1) == '<' && substr($html_array[$x], 1, 1) != '/') { |
|
286 | - if (substr($html_array[$x], 1, 1) != ' ' && substr($html_array[$x], 1, 3) != 'img' && substr($html_array[$x], 1, 2) != 'br' && substr($html_array[$x], 1, 2) != 'hr' && substr($html_array[$x], 1, 5) != 'input' && substr($html_array[$x], 1, 4) != 'link' && substr($html_array[$x], 1, 4) != 'meta' && substr($html_array[$x], 1, 4) != 'col ' && substr($html_array[$x], 1, 5) != 'frame' && substr($html_array[$x], 1, 7) != 'isindex' && substr($html_array[$x], 1, 5) != 'param' && substr($html_array[$x], 1, 4) != 'area' && substr($html_array[$x], 1, 4) != 'base' && substr($html_array[$x], 0, 2) != '<!' && substr($html_array[$x], 0, 5) != '<?xml' |
|
287 | - ) { |
|
288 | - $tabs++; |
|
289 | - } |
|
290 | - } |
|
291 | - } |
|
292 | - |
|
293 | - // Remove empty lines |
|
294 | - if ($this->formatType > 1) { |
|
295 | - $this->removeEmptyLines($html); |
|
296 | - } |
|
297 | - |
|
298 | - // Restore saved comments, styles and java-scripts |
|
299 | - for ($i = 0; $i < count($no_format); $i++) { |
|
300 | - $no_format[$i] = $this->rTrimLines($no_format[$i]); // remove white space after line ending |
|
301 | - $html = str_replace("<!-- ELEMENT $i -->", $no_format[$i], $html); |
|
302 | - } |
|
303 | - |
|
304 | - // include debug comment at the end |
|
305 | - if ($tabs != 0 && $this->debugComment === true) { |
|
306 | - $html .= '<!--' . $tabs . " open elements found-->\r\n"; |
|
307 | - } |
|
308 | - } |
|
309 | - |
|
310 | - /** |
|
311 | - * Remove ALL line breaks and multiple white space |
|
312 | - * |
|
313 | - * @param string $html |
|
314 | - * |
|
315 | - * @return string |
|
316 | - */ |
|
317 | - protected function killLineBreaks($html) |
|
318 | - { |
|
319 | - $html = $this->convNlOs($html); |
|
320 | - $html = str_replace($this->newline, "", $html); |
|
321 | - // remove double empty spaces |
|
322 | - if ($this->utf8 == true) { |
|
323 | - $html = preg_replace('/\s\s+/u', ' ', $html); |
|
324 | - } else { |
|
325 | - $html = preg_replace('/\s\s+/', ' ', $html); |
|
326 | - } |
|
327 | - return $html; |
|
328 | - } |
|
329 | - |
|
330 | - /** |
|
331 | - * Remove multiple white space, keeps line breaks |
|
332 | - * |
|
333 | - * @param string $html |
|
334 | - * |
|
335 | - * @return string |
|
336 | - */ |
|
337 | - protected function killWhiteSpace($html) |
|
338 | - { |
|
339 | - $html = $this->convNlOs($html); |
|
340 | - $temp = explode($this->newline, $html); |
|
341 | - for ($i = 0; $i < count($temp); $i++) { |
|
342 | - if (!trim($temp[$i])) { |
|
343 | - unset($temp[$i]); |
|
344 | - } else { |
|
345 | - $temp[$i] = trim($temp[$i]); |
|
346 | - $temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]); |
|
347 | - } |
|
348 | - } |
|
349 | - $html = implode($this->newline, $temp); |
|
350 | - return $html; |
|
351 | - } |
|
352 | - |
|
353 | - /** |
|
354 | - * Remove white space at the end of lines, keeps other white space and line breaks |
|
355 | - * |
|
356 | - * @param string $html |
|
357 | - * |
|
358 | - * @return string |
|
359 | - */ |
|
360 | - protected function rTrimLines($html) |
|
361 | - { |
|
362 | - $html = $this->convNlOs($html); |
|
363 | - $temp = explode($this->newline, $html); |
|
364 | - for ($i = 0; $i < count($temp); $i++) { |
|
365 | - $temp[$i] = rtrim($temp[$i]); |
|
366 | - } |
|
367 | - $html = implode($this->newline, $temp); |
|
368 | - return $html; |
|
369 | - } |
|
370 | - |
|
371 | - /** |
|
372 | - * Convert newlines according to the current OS |
|
373 | - * |
|
374 | - * @param string $html |
|
375 | - * |
|
376 | - * @return string |
|
377 | - */ |
|
378 | - protected function convNlOs($html) |
|
379 | - { |
|
380 | - $html = preg_replace("(\r\n|\n|\r)", $this->newline, $html); |
|
381 | - return $html; |
|
382 | - } |
|
383 | - |
|
384 | - /** |
|
385 | - * Remove tabs and empty spaces before and after lines, transforms linebreaks system conform |
|
386 | - * |
|
387 | - * @param string $html Html-Code |
|
388 | - * |
|
389 | - * @return void |
|
390 | - */ |
|
391 | - protected function trimLines(&$html) |
|
392 | - { |
|
393 | - $html = str_replace("\t", "", $html); |
|
394 | - // convert newlines according to the current OS |
|
395 | - if (TYPO3_OS == "WIN") { |
|
396 | - $html = str_replace("\n", "\r\n", $html); |
|
397 | - } else { |
|
398 | - $html = str_replace("\r\n", "\n", $html); |
|
399 | - } |
|
400 | - $temp = explode($this->newline, $html); |
|
401 | - $temp = array_map('trim', $temp); |
|
402 | - $html = implode($this->newline, $temp); |
|
403 | - unset($temp); |
|
404 | - } |
|
405 | - |
|
406 | - /** |
|
407 | - * Remove empty lines |
|
408 | - * |
|
409 | - * @param string $html |
|
410 | - * |
|
411 | - * @return void |
|
412 | - */ |
|
413 | - protected function removeEmptyLines(&$html) |
|
414 | - { |
|
415 | - $temp = explode($this->newline, $html); |
|
416 | - $result = array(); |
|
417 | - for ($i = 0; $i < count($temp); ++$i) { |
|
418 | - if ("" == trim($temp[$i])) { |
|
419 | - continue; |
|
420 | - } |
|
421 | - $result[] = $temp[$i]; |
|
422 | - } |
|
423 | - $html = implode($this->newline, $result); |
|
424 | - } |
|
425 | - |
|
426 | - /** |
|
427 | - * Remove new lines where unnecessary |
|
428 | - * spares line breaks within: pre, textarea, ... |
|
429 | - * |
|
430 | - * @param string $html |
|
431 | - * |
|
432 | - * @return void |
|
433 | - */ |
|
434 | - protected function removeNewLines(&$html) |
|
435 | - { |
|
436 | - $splitArray = array( |
|
437 | - 'textarea', |
|
438 | - 'pre' |
|
439 | - ); // eventuell auch: span, script, style |
|
440 | - $peaces = preg_split('#(<(' . implode('|', $splitArray) . ').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
441 | - $html = ""; |
|
442 | - for ($i = 0; $i < count($peaces); $i++) { |
|
443 | - if (($i + 1) % 3 == 0) { |
|
444 | - continue; |
|
445 | - } |
|
446 | - $html .= (($i - 1) % 3 != 0) ? $this->killLineBreaks($peaces[$i]) : $peaces[$i]; |
|
447 | - } |
|
448 | - } |
|
449 | - |
|
450 | - /** |
|
451 | - * Remove obsolete link schema |
|
452 | - * |
|
453 | - * @param string $html |
|
454 | - * |
|
455 | - * @return void |
|
456 | - */ |
|
457 | - protected function removeLinkSchema(&$html) |
|
458 | - { |
|
459 | - $html = preg_replace("/<link rel=\"?schema.dc\"?.+?>/is", "", $html); |
|
460 | - } |
|
461 | - |
|
462 | - /** |
|
463 | - * Remove empty alt tags |
|
464 | - * |
|
465 | - * @param string $html |
|
466 | - * |
|
467 | - * @return void |
|
468 | - */ |
|
469 | - protected function removeEmptyAltAtr(&$html) |
|
470 | - { |
|
471 | - $html = str_replace("alt=\"\"", "", $html); |
|
472 | - } |
|
473 | - |
|
474 | - /** |
|
475 | - * Remove broken links in <a> tags |
|
476 | - * |
|
477 | - * @param string $html |
|
478 | - * |
|
479 | - * @return void |
|
480 | - */ |
|
481 | - protected function removeRealUrlBrokenRootLink(&$html) |
|
482 | - { |
|
483 | - $html = str_replace('href=".html"', 'href=""', $html); |
|
484 | - } |
|
485 | - |
|
486 | - /** |
|
487 | - * Remove all comments except the whitelisted comments |
|
488 | - * |
|
489 | - * @param string $html |
|
490 | - * |
|
491 | - * @return void |
|
492 | - */ |
|
493 | - protected function removeComments(&$html) |
|
494 | - { |
|
495 | - // match all styles, scripts and comments |
|
496 | - $matches = array(); |
|
497 | - preg_match_all('/(?s)((<!--.*?-->)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im', $html, $matches); |
|
498 | - foreach ($matches[0] as $tag) { |
|
499 | - if ($this->keepComment($tag) === false) { |
|
500 | - $html = str_replace($tag, '', $html); |
|
501 | - } |
|
502 | - } |
|
503 | - } |
|
504 | - |
|
505 | - /** |
|
506 | - * Check if a comment is defined to be kept in a pattern whiteListOfComments |
|
507 | - * |
|
508 | - * @param string $commentHtml |
|
509 | - * |
|
510 | - * @return boolean |
|
511 | - */ |
|
512 | - protected function keepComment($commentHtml) |
|
513 | - { |
|
514 | - // if not even a comment, skip this |
|
515 | - if (!preg_match('/^\<\!\-\-(.*?)\-\-\>$/usi', $commentHtml)) { |
|
516 | - return true; |
|
517 | - } |
|
518 | - |
|
519 | - // if not defined in white list |
|
520 | - if (!empty($this->whiteListCommentsPatterns)) { |
|
521 | - $commentHtml = str_replace("<!--", "", $commentHtml); |
|
522 | - $commentHtml = str_replace("-->", "", $commentHtml); |
|
523 | - $commentHtml = trim($commentHtml); |
|
524 | - foreach ($this->whiteListCommentsPatterns as $pattern) { |
|
525 | - if (preg_match($pattern, $commentHtml)) { |
|
526 | - return true; |
|
527 | - } |
|
528 | - } |
|
529 | - } |
|
530 | - return false; |
|
531 | - } |
|
532 | - |
|
533 | - /** |
|
534 | - * TYPO3 adds to each page a small script: |
|
535 | - * <script language="javascript"> |
|
536 | - * <!-- |
|
537 | - * browserName = navigator.appName; |
|
538 | - * browserVer = parseInt(navigator.appVersion); |
|
539 | - * var msie4 = (browserName == "Microsoft Internet Explorer" && browserVer >= 4); |
|
540 | - * if ((browserName == "Netscape" && browserVer >= 3) || msie4 || browserName=="Konqueror") {version = "n3";} else {version = "n2";} |
|
541 | - * function blurLink(theObject){ |
|
542 | - * if (msie4){theObject.blur();} |
|
543 | - * } |
|
544 | - * // --> |
|
545 | - * </script> |
|
546 | - * Obviously used for client-side browserdetection - but thats not necessary if your page doesn't use JS |
|
547 | - * |
|
548 | - * @param string $html |
|
549 | - * |
|
550 | - * @return void |
|
551 | - */ |
|
552 | - public function removeBlurScript(&$html) |
|
553 | - { |
|
554 | - if (strlen($html) < 100000) { |
|
555 | - $pattern = '/<script (type="text\/javascript"|language="javascript")>.+?Konqueror.+function blurLink.+theObject.blur.+?<\/script>/is'; |
|
556 | - $html = preg_replace($pattern, '', $html); // in head |
|
557 | - } |
|
558 | - $html = str_replace(' onfocus="blurLink(this);"', '', $html); // in body |
|
559 | - } |
|
560 | - |
|
561 | - /** |
|
562 | - * Remove the generator Tag |
|
563 | - * |
|
564 | - * @param string $html |
|
565 | - * |
|
566 | - * @return void |
|
567 | - */ |
|
568 | - public function removeGenerator(&$html) |
|
569 | - { |
|
570 | - $html = preg_replace('/<meta name=\"?generator\"?.+?>/is', '', $html); |
|
571 | - } |
|
572 | - |
|
573 | - /** |
|
574 | - * Include configured header comment in HTML content block |
|
575 | - * |
|
576 | - * @param $html |
|
577 | - */ |
|
578 | - public function includeHeaderComment(&$html) |
|
579 | - { |
|
580 | - if (!empty($this->headerComment)) { |
|
581 | - $html = preg_replace_callback( |
|
582 | - '/<meta http-equiv(.*)>/Usi', |
|
583 | - function ($matches) { |
|
584 | - return trim($matches[0] . $this->newline . $this->tab . $this->tab . '<!-- ' . $this->headerComment . '-->'); |
|
585 | - }, |
|
586 | - $html, |
|
587 | - 1 |
|
588 | - ); |
|
589 | - } |
|
590 | - } |
|
11 | + /** |
|
12 | + * Enable Debug comment in footer |
|
13 | + * |
|
14 | + * @var boolean |
|
15 | + */ |
|
16 | + protected $debugComment = false; |
|
17 | + |
|
18 | + /** |
|
19 | + * Format Type |
|
20 | + * |
|
21 | + * @var integer |
|
22 | + */ |
|
23 | + protected $formatType = 2; |
|
24 | + |
|
25 | + /** |
|
26 | + * Tab character |
|
27 | + * |
|
28 | + * @var string |
|
29 | + */ |
|
30 | + protected $tab = "\t"; |
|
31 | + |
|
32 | + /** |
|
33 | + * Newline character |
|
34 | + * |
|
35 | + * @var string |
|
36 | + */ |
|
37 | + protected $newline = "\n"; |
|
38 | + |
|
39 | + /** |
|
40 | + * Enable/disable UTF8 support |
|
41 | + * |
|
42 | + * @var boolean |
|
43 | + */ |
|
44 | + protected $utf8 = true; |
|
45 | + |
|
46 | + /** |
|
47 | + * Configured extra header comment |
|
48 | + * |
|
49 | + * @var string |
|
50 | + */ |
|
51 | + protected $headerComment = ''; |
|
52 | + |
|
53 | + /** |
|
54 | + * Enable/disable removal of generator tag |
|
55 | + * |
|
56 | + * @var boolean |
|
57 | + */ |
|
58 | + protected $removeGenerator = true; |
|
59 | + |
|
60 | + /** |
|
61 | + * Enable/disable removal of comments |
|
62 | + * |
|
63 | + * @var boolean |
|
64 | + */ |
|
65 | + protected $removeComments = true; |
|
66 | + |
|
67 | + /** |
|
68 | + * Enable/disable removal of blur scripts |
|
69 | + * |
|
70 | + * @var boolean |
|
71 | + */ |
|
72 | + protected $removeBlurScript = true; |
|
73 | + |
|
74 | + /** |
|
75 | + * Patterns for white-listing comments inside content |
|
76 | + * |
|
77 | + * @var array |
|
78 | + */ |
|
79 | + protected $whiteListCommentsPatterns = array(); |
|
80 | + |
|
81 | + /** |
|
82 | + * Set variables based on given config |
|
83 | + * |
|
84 | + * @param array $config |
|
85 | + * |
|
86 | + * @return void |
|
87 | + */ |
|
88 | + public function setVariables(array $config) |
|
89 | + { |
|
90 | + switch (TYPO3_OS) { // set newline |
|
91 | + case 'WIN': |
|
92 | + $this->newline = "\r\n"; |
|
93 | + break; |
|
94 | + default: |
|
95 | + $this->newline = "\n"; |
|
96 | + } |
|
97 | + |
|
98 | + if (!empty($config)) { |
|
99 | + if ($config['formatHtml'] && is_numeric($config['formatHtml'])) { |
|
100 | + $this->formatType = (int) $config['formatHtml']; |
|
101 | + } |
|
102 | + |
|
103 | + if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) { |
|
104 | + $this->tab = str_pad('', $config['formatHtml.']['tabSize'], ' '); |
|
105 | + } |
|
106 | + |
|
107 | + if (isset($config['enable_utf'])) { |
|
108 | + $this->utf8 = (bool) $config['enable_utf-8_support']; |
|
109 | + } |
|
110 | + |
|
111 | + if (isset($config['formatHtml.']['debugComment'])) { |
|
112 | + $this->debugComment = (bool) $config['formatHtml.']['debugComment']; |
|
113 | + } |
|
114 | + |
|
115 | + if (isset($config['headerComment'])) { |
|
116 | + $this->headerComment = $config['headerComment']; |
|
117 | + } |
|
118 | + |
|
119 | + if (isset($config['removeGenerator'])) { |
|
120 | + $this->removeGenerator = (bool) $config['removeGenerator']; |
|
121 | + } |
|
122 | + |
|
123 | + if (isset($config['removeComments'])) { |
|
124 | + $this->removeComments = (bool) $config['removeComments']; |
|
125 | + |
|
126 | + if (isset($config['removeComments.'])) { |
|
127 | + $this->whiteListCommentsPatterns = $config['removeComments.']['keep.']; |
|
128 | + } |
|
129 | + } |
|
130 | + |
|
131 | + if (isset($config['removeBlurScript'])) { |
|
132 | + $this->removeBlurScript = (bool) $config['removeBlurScript']; |
|
133 | + } |
|
134 | + } |
|
135 | + } |
|
136 | + |
|
137 | + /** |
|
138 | + * Clean given HTML with formatter |
|
139 | + * |
|
140 | + * @param string $html |
|
141 | + * @param array $config |
|
142 | + * |
|
143 | + * @return void |
|
144 | + */ |
|
145 | + public function clean(&$html, $config = array()) |
|
146 | + { |
|
147 | + if (!empty($config)) { |
|
148 | + if ((bool) $config['enabled'] === false) { |
|
149 | + return; |
|
150 | + } |
|
151 | + |
|
152 | + $this->setVariables($config); |
|
153 | + } |
|
154 | + |
|
155 | + if (true === $this->removeGenerator) { |
|
156 | + $this->removeGenerator($html); |
|
157 | + } |
|
158 | + |
|
159 | + if (true === $this->removeComments) { |
|
160 | + $this->removeComments($html); |
|
161 | + } |
|
162 | + |
|
163 | + if (true === $this->removeBlurScript) { |
|
164 | + $this->removeBlurScript($html); |
|
165 | + } |
|
166 | + |
|
167 | + if (!empty($this->headerComment)) { |
|
168 | + $this->includeHeaderComment($html); |
|
169 | + } |
|
170 | + |
|
171 | + if ($this->formatType) { |
|
172 | + $this->formatHtml($html); |
|
173 | + } |
|
174 | + } |
|
175 | + |
|
176 | + /** |
|
177 | + * Formats the (X)HTML code: |
|
178 | + * - taps according to the hirarchy of the tags |
|
179 | + * - removes empty spaces between tags |
|
180 | + * - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..) |
|
181 | + * choose from five options: |
|
182 | + * 0 => off |
|
183 | + * 1 => no line break at all (code in one line) |
|
184 | + * 2 => minimalistic line breaks (structure defining box-elements) |
|
185 | + * 3 => aesthetic line breaks (important box-elements) |
|
186 | + * 4 => logic line breaks (all box-elements) |
|
187 | + * 5 => max line breaks (all elements) |
|
188 | + * |
|
189 | + * @param string $html |
|
190 | + * |
|
191 | + * @return void |
|
192 | + */ |
|
193 | + protected function formatHtml(&$html) |
|
194 | + { |
|
195 | + // Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers |
|
196 | + preg_match_all('/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im', $html, $matches); |
|
197 | + $no_format = $matches[0]; // do not format these block elements |
|
198 | + for ($i = 0; $i < count($no_format); $i++) { |
|
199 | + $html = str_replace($no_format[$i], "\n<!-- ELEMENT $i -->", $html); |
|
200 | + } |
|
201 | + |
|
202 | + // define box elements for formatting |
|
203 | + $trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section'; |
|
204 | + $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup'; |
|
205 | + $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script'; |
|
206 | + $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--'; |
|
207 | + $allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')'; |
|
208 | + $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)'; |
|
209 | + $structureBoxLikeElements = '(?>html|head|body|div|!--)'; |
|
210 | + |
|
211 | + // split html into it's elements |
|
212 | + $html_array_temp = preg_split('/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', $html, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); |
|
213 | + // remove empty lines |
|
214 | + $html_array = array(''); |
|
215 | + $z = 1; |
|
216 | + for ($x = 0; $x < count($html_array_temp); $x++) { |
|
217 | + $t = trim($html_array_temp[$x]); |
|
218 | + if ($t !== '') { |
|
219 | + $html_array[$z] = $html_array_temp[$x]; |
|
220 | + $z++; |
|
221 | + // if the trimmed line was empty but the original wasn't, search for inline element closing tags in the last $html_array element |
|
222 | + } else { |
|
223 | + // if ($t !== $html_array_temp[$x] && preg_match('/<\/' . $inlineElements . '( .*)? >/Usi', $html_array[$z - 1]) === 1) |
|
224 | + $html_array[$z] = ' '; |
|
225 | + $z++; |
|
226 | + } |
|
227 | + } |
|
228 | + |
|
229 | + // rebuild html |
|
230 | + $html = ''; |
|
231 | + $tabs = 0; |
|
232 | + for ($x = 0; $x < count($html_array); $x++) { |
|
233 | + // check if the element should stand in a new line |
|
234 | + $newline = false; |
|
235 | + if (substr($html_array[$x - 1], 0, 5) == '<?xml') { |
|
236 | + $newline = true; |
|
237 | + } elseif ($this->formatType == 2 && ( // minimalistic line break |
|
238 | + # this element has a line break before itself |
|
239 | + preg_match('/<' . $structureBoxLikeElements . '(.*)>/Usi', $html_array[$x]) || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $html_array[$x]) || # one element before is a element that has a line break after |
|
240 | + preg_match('/<\/' . $structureBoxLikeElements . '(.*)>/Usi', $html_array[$x - 1]) || substr($html_array[$x - 1], 0, 4) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $html_array[$x - 1])) |
|
241 | + ) { |
|
242 | + $newline = true; |
|
243 | + } elseif ($this->formatType == 3 && ( // aestetic line break |
|
244 | + # this element has a line break before itself |
|
245 | + preg_match('/<' . $esteticBoxLikeElements . '(.*)>/Usi', $html_array[$x]) || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $html_array[$x]) || # one element before is a element that has a line break after |
|
246 | + preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $html_array[$x - 1]) || substr($html_array[$x - 1], 0, 4) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $html_array[$x - 1])) |
|
247 | + ) { |
|
248 | + $newline = true; |
|
249 | + } elseif ($this->formatType >= 4 && ( // logical line break |
|
250 | + # this element has a line break before itself |
|
251 | + preg_match('/<' . $allBoxLikeElements . '(.*)>/Usi', $html_array[$x]) || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $html_array[$x]) || # one element before is a element that has a line break after |
|
252 | + preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $html_array[$x - 1]) || substr($html_array[$x - 1], 0, 4) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $html_array[$x - 1])) |
|
253 | + ) { |
|
254 | + $newline = true; |
|
255 | + } |
|
256 | + |
|
257 | + // count down a tab |
|
258 | + if (substr($html_array[$x], 0, 2) == '</') { |
|
259 | + $tabs--; |
|
260 | + } |
|
261 | + |
|
262 | + // add tabs and line breaks in front of the current tag |
|
263 | + if ($newline) { |
|
264 | + $html .= $this->newline; |
|
265 | + for ($y = 0; $y < $tabs; $y++) { |
|
266 | + $html .= $this->tab; |
|
267 | + } |
|
268 | + } |
|
269 | + |
|
270 | + // remove white spaces and line breaks and add current tag to the html-string |
|
271 | + if (substr($html_array[$x - 1], 0, 4) == '<pre' // remove white space after line ending in PRE / TEXTAREA / comment |
|
272 | + || substr($html_array[$x - 1], 0, 9) == '<textarea' || substr($html_array[$x - 1], 0, 4) == '<!--' |
|
273 | + ) { |
|
274 | + $html .= $this->rTrimLines($html_array[$x]); |
|
275 | + } elseif (substr($html_array[$x], 0, 9) == '<![CDATA[' // remove multiple white space in CDATA / XML |
|
276 | + || substr($html_array[$x], 0, 5) == '<?xml' |
|
277 | + ) { |
|
278 | + $html .= $this->killWhiteSpace($html_array[$x]); |
|
279 | + } else { // remove all line breaks |
|
280 | + $html .= $this->killLineBreaks($html_array[$x]); |
|
281 | + } |
|
282 | + |
|
283 | + // count up a tab |
|
284 | + if (substr($html_array[$x], 0, 1) == '<' && substr($html_array[$x], 1, 1) != '/') { |
|
285 | + if (substr($html_array[$x], 1, 1) != ' ' && substr($html_array[$x], 1, 3) != 'img' && substr($html_array[$x], 1, 2) != 'br' && substr($html_array[$x], 1, 2) != 'hr' && substr($html_array[$x], 1, 5) != 'input' && substr($html_array[$x], 1, 4) != 'link' && substr($html_array[$x], 1, 4) != 'meta' && substr($html_array[$x], 1, 4) != 'col ' && substr($html_array[$x], 1, 5) != 'frame' && substr($html_array[$x], 1, 7) != 'isindex' && substr($html_array[$x], 1, 5) != 'param' && substr($html_array[$x], 1, 4) != 'area' && substr($html_array[$x], 1, 4) != 'base' && substr($html_array[$x], 0, 2) != '<!' && substr($html_array[$x], 0, 5) != '<?xml' |
|
286 | + ) { |
|
287 | + $tabs++; |
|
288 | + } |
|
289 | + } |
|
290 | + } |
|
291 | + |
|
292 | + // Remove empty lines |
|
293 | + if ($this->formatType > 1) { |
|
294 | + $this->removeEmptyLines($html); |
|
295 | + } |
|
296 | + |
|
297 | + // Restore saved comments, styles and java-scripts |
|
298 | + for ($i = 0; $i < count($no_format); $i++) { |
|
299 | + $no_format[$i] = $this->rTrimLines($no_format[$i]); // remove white space after line ending |
|
300 | + $html = str_replace("<!-- ELEMENT $i -->", $no_format[$i], $html); |
|
301 | + } |
|
302 | + |
|
303 | + // include debug comment at the end |
|
304 | + if ($tabs != 0 && $this->debugComment === true) { |
|
305 | + $html .= '<!--' . $tabs . " open elements found-->\r\n"; |
|
306 | + } |
|
307 | + } |
|
308 | + |
|
309 | + /** |
|
310 | + * Remove ALL line breaks and multiple white space |
|
311 | + * |
|
312 | + * @param string $html |
|
313 | + * |
|
314 | + * @return string |
|
315 | + */ |
|
316 | + protected function killLineBreaks($html) |
|
317 | + { |
|
318 | + $html = $this->convNlOs($html); |
|
319 | + $html = str_replace($this->newline, "", $html); |
|
320 | + // remove double empty spaces |
|
321 | + if ($this->utf8 == true) { |
|
322 | + $html = preg_replace('/\s\s+/u', ' ', $html); |
|
323 | + } else { |
|
324 | + $html = preg_replace('/\s\s+/', ' ', $html); |
|
325 | + } |
|
326 | + return $html; |
|
327 | + } |
|
328 | + |
|
329 | + /** |
|
330 | + * Remove multiple white space, keeps line breaks |
|
331 | + * |
|
332 | + * @param string $html |
|
333 | + * |
|
334 | + * @return string |
|
335 | + */ |
|
336 | + protected function killWhiteSpace($html) |
|
337 | + { |
|
338 | + $html = $this->convNlOs($html); |
|
339 | + $temp = explode($this->newline, $html); |
|
340 | + for ($i = 0; $i < count($temp); $i++) { |
|
341 | + if (!trim($temp[$i])) { |
|
342 | + unset($temp[$i]); |
|
343 | + } else { |
|
344 | + $temp[$i] = trim($temp[$i]); |
|
345 | + $temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]); |
|
346 | + } |
|
347 | + } |
|
348 | + $html = implode($this->newline, $temp); |
|
349 | + return $html; |
|
350 | + } |
|
351 | + |
|
352 | + /** |
|
353 | + * Remove white space at the end of lines, keeps other white space and line breaks |
|
354 | + * |
|
355 | + * @param string $html |
|
356 | + * |
|
357 | + * @return string |
|
358 | + */ |
|
359 | + protected function rTrimLines($html) |
|
360 | + { |
|
361 | + $html = $this->convNlOs($html); |
|
362 | + $temp = explode($this->newline, $html); |
|
363 | + for ($i = 0; $i < count($temp); $i++) { |
|
364 | + $temp[$i] = rtrim($temp[$i]); |
|
365 | + } |
|
366 | + $html = implode($this->newline, $temp); |
|
367 | + return $html; |
|
368 | + } |
|
369 | + |
|
370 | + /** |
|
371 | + * Convert newlines according to the current OS |
|
372 | + * |
|
373 | + * @param string $html |
|
374 | + * |
|
375 | + * @return string |
|
376 | + */ |
|
377 | + protected function convNlOs($html) |
|
378 | + { |
|
379 | + $html = preg_replace("(\r\n|\n|\r)", $this->newline, $html); |
|
380 | + return $html; |
|
381 | + } |
|
382 | + |
|
383 | + /** |
|
384 | + * Remove tabs and empty spaces before and after lines, transforms linebreaks system conform |
|
385 | + * |
|
386 | + * @param string $html Html-Code |
|
387 | + * |
|
388 | + * @return void |
|
389 | + */ |
|
390 | + protected function trimLines(&$html) |
|
391 | + { |
|
392 | + $html = str_replace("\t", "", $html); |
|
393 | + // convert newlines according to the current OS |
|
394 | + if (TYPO3_OS == "WIN") { |
|
395 | + $html = str_replace("\n", "\r\n", $html); |
|
396 | + } else { |
|
397 | + $html = str_replace("\r\n", "\n", $html); |
|
398 | + } |
|
399 | + $temp = explode($this->newline, $html); |
|
400 | + $temp = array_map('trim', $temp); |
|
401 | + $html = implode($this->newline, $temp); |
|
402 | + unset($temp); |
|
403 | + } |
|
404 | + |
|
405 | + /** |
|
406 | + * Remove empty lines |
|
407 | + * |
|
408 | + * @param string $html |
|
409 | + * |
|
410 | + * @return void |
|
411 | + */ |
|
412 | + protected function removeEmptyLines(&$html) |
|
413 | + { |
|
414 | + $temp = explode($this->newline, $html); |
|
415 | + $result = array(); |
|
416 | + for ($i = 0; $i < count($temp); ++$i) { |
|
417 | + if ("" == trim($temp[$i])) { |
|
418 | + continue; |
|
419 | + } |
|
420 | + $result[] = $temp[$i]; |
|
421 | + } |
|
422 | + $html = implode($this->newline, $result); |
|
423 | + } |
|
424 | + |
|
425 | + /** |
|
426 | + * Remove new lines where unnecessary |
|
427 | + * spares line breaks within: pre, textarea, ... |
|
428 | + * |
|
429 | + * @param string $html |
|
430 | + * |
|
431 | + * @return void |
|
432 | + */ |
|
433 | + protected function removeNewLines(&$html) |
|
434 | + { |
|
435 | + $splitArray = array( |
|
436 | + 'textarea', |
|
437 | + 'pre' |
|
438 | + ); // eventuell auch: span, script, style |
|
439 | + $peaces = preg_split('#(<(' . implode('|', $splitArray) . ').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
440 | + $html = ""; |
|
441 | + for ($i = 0; $i < count($peaces); $i++) { |
|
442 | + if (($i + 1) % 3 == 0) { |
|
443 | + continue; |
|
444 | + } |
|
445 | + $html .= (($i - 1) % 3 != 0) ? $this->killLineBreaks($peaces[$i]) : $peaces[$i]; |
|
446 | + } |
|
447 | + } |
|
448 | + |
|
449 | + /** |
|
450 | + * Remove obsolete link schema |
|
451 | + * |
|
452 | + * @param string $html |
|
453 | + * |
|
454 | + * @return void |
|
455 | + */ |
|
456 | + protected function removeLinkSchema(&$html) |
|
457 | + { |
|
458 | + $html = preg_replace("/<link rel=\"?schema.dc\"?.+?>/is", "", $html); |
|
459 | + } |
|
460 | + |
|
461 | + /** |
|
462 | + * Remove empty alt tags |
|
463 | + * |
|
464 | + * @param string $html |
|
465 | + * |
|
466 | + * @return void |
|
467 | + */ |
|
468 | + protected function removeEmptyAltAtr(&$html) |
|
469 | + { |
|
470 | + $html = str_replace("alt=\"\"", "", $html); |
|
471 | + } |
|
472 | + |
|
473 | + /** |
|
474 | + * Remove broken links in <a> tags |
|
475 | + * |
|
476 | + * @param string $html |
|
477 | + * |
|
478 | + * @return void |
|
479 | + */ |
|
480 | + protected function removeRealUrlBrokenRootLink(&$html) |
|
481 | + { |
|
482 | + $html = str_replace('href=".html"', 'href=""', $html); |
|
483 | + } |
|
484 | + |
|
485 | + /** |
|
486 | + * Remove all comments except the whitelisted comments |
|
487 | + * |
|
488 | + * @param string $html |
|
489 | + * |
|
490 | + * @return void |
|
491 | + */ |
|
492 | + protected function removeComments(&$html) |
|
493 | + { |
|
494 | + // match all styles, scripts and comments |
|
495 | + $matches = array(); |
|
496 | + preg_match_all('/(?s)((<!--.*?-->)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im', $html, $matches); |
|
497 | + foreach ($matches[0] as $tag) { |
|
498 | + if ($this->keepComment($tag) === false) { |
|
499 | + $html = str_replace($tag, '', $html); |
|
500 | + } |
|
501 | + } |
|
502 | + } |
|
503 | + |
|
504 | + /** |
|
505 | + * Check if a comment is defined to be kept in a pattern whiteListOfComments |
|
506 | + * |
|
507 | + * @param string $commentHtml |
|
508 | + * |
|
509 | + * @return boolean |
|
510 | + */ |
|
511 | + protected function keepComment($commentHtml) |
|
512 | + { |
|
513 | + // if not even a comment, skip this |
|
514 | + if (!preg_match('/^\<\!\-\-(.*?)\-\-\>$/usi', $commentHtml)) { |
|
515 | + return true; |
|
516 | + } |
|
517 | + |
|
518 | + // if not defined in white list |
|
519 | + if (!empty($this->whiteListCommentsPatterns)) { |
|
520 | + $commentHtml = str_replace("<!--", "", $commentHtml); |
|
521 | + $commentHtml = str_replace("-->", "", $commentHtml); |
|
522 | + $commentHtml = trim($commentHtml); |
|
523 | + foreach ($this->whiteListCommentsPatterns as $pattern) { |
|
524 | + if (preg_match($pattern, $commentHtml)) { |
|
525 | + return true; |
|
526 | + } |
|
527 | + } |
|
528 | + } |
|
529 | + return false; |
|
530 | + } |
|
531 | + |
|
532 | + /** |
|
533 | + * TYPO3 adds to each page a small script: |
|
534 | + * <script language="javascript"> |
|
535 | + * <!-- |
|
536 | + * browserName = navigator.appName; |
|
537 | + * browserVer = parseInt(navigator.appVersion); |
|
538 | + * var msie4 = (browserName == "Microsoft Internet Explorer" && browserVer >= 4); |
|
539 | + * if ((browserName == "Netscape" && browserVer >= 3) || msie4 || browserName=="Konqueror") {version = "n3";} else {version = "n2";} |
|
540 | + * function blurLink(theObject){ |
|
541 | + * if (msie4){theObject.blur();} |
|
542 | + * } |
|
543 | + * // --> |
|
544 | + * </script> |
|
545 | + * Obviously used for client-side browserdetection - but thats not necessary if your page doesn't use JS |
|
546 | + * |
|
547 | + * @param string $html |
|
548 | + * |
|
549 | + * @return void |
|
550 | + */ |
|
551 | + public function removeBlurScript(&$html) |
|
552 | + { |
|
553 | + if (strlen($html) < 100000) { |
|
554 | + $pattern = '/<script (type="text\/javascript"|language="javascript")>.+?Konqueror.+function blurLink.+theObject.blur.+?<\/script>/is'; |
|
555 | + $html = preg_replace($pattern, '', $html); // in head |
|
556 | + } |
|
557 | + $html = str_replace(' onfocus="blurLink(this);"', '', $html); // in body |
|
558 | + } |
|
559 | + |
|
560 | + /** |
|
561 | + * Remove the generator Tag |
|
562 | + * |
|
563 | + * @param string $html |
|
564 | + * |
|
565 | + * @return void |
|
566 | + */ |
|
567 | + public function removeGenerator(&$html) |
|
568 | + { |
|
569 | + $html = preg_replace('/<meta name=\"?generator\"?.+?>/is', '', $html); |
|
570 | + } |
|
571 | + |
|
572 | + /** |
|
573 | + * Include configured header comment in HTML content block |
|
574 | + * |
|
575 | + * @param $html |
|
576 | + */ |
|
577 | + public function includeHeaderComment(&$html) |
|
578 | + { |
|
579 | + if (!empty($this->headerComment)) { |
|
580 | + $html = preg_replace_callback( |
|
581 | + '/<meta http-equiv(.*)>/Usi', |
|
582 | + function ($matches) { |
|
583 | + return trim($matches[0] . $this->newline . $this->tab . $this->tab . '<!-- ' . $this->headerComment . '-->'); |
|
584 | + }, |
|
585 | + $html, |
|
586 | + 1 |
|
587 | + ); |
|
588 | + } |
|
589 | + } |
|
591 | 590 | } |
@@ -205,12 +205,12 @@ discard block |
||
205 | 205 | $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup'; |
206 | 206 | $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script'; |
207 | 207 | $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--'; |
208 | - $allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')'; |
|
208 | + $allBoxLikeElements = '(?>'.$trueBoxElements.'|'.$functionalBoxElements.'|'.$usableBoxElements.'|'.$imagineBoxElements.')'; |
|
209 | 209 | $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)'; |
210 | 210 | $structureBoxLikeElements = '(?>html|head|body|div|!--)'; |
211 | 211 | |
212 | 212 | // split html into it's elements |
213 | - $html_array_temp = preg_split('/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', $html, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); |
|
213 | + $html_array_temp = preg_split('/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', $html, -1, PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY); |
|
214 | 214 | // remove empty lines |
215 | 215 | $html_array = array(''); |
216 | 216 | $z = 1; |
@@ -237,20 +237,20 @@ discard block |
||
237 | 237 | $newline = true; |
238 | 238 | } elseif ($this->formatType == 2 && ( // minimalistic line break |
239 | 239 | # this element has a line break before itself |
240 | - preg_match('/<' . $structureBoxLikeElements . '(.*)>/Usi', $html_array[$x]) || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $html_array[$x]) || # one element before is a element that has a line break after |
|
241 | - preg_match('/<\/' . $structureBoxLikeElements . '(.*)>/Usi', $html_array[$x - 1]) || substr($html_array[$x - 1], 0, 4) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $html_array[$x - 1])) |
|
240 | + preg_match('/<'.$structureBoxLikeElements.'(.*)>/Usi', $html_array[$x]) || preg_match('/<'.$structureBoxLikeElements.'(.*) \/>/Usi', $html_array[$x]) || # one element before is a element that has a line break after |
|
241 | + preg_match('/<\/'.$structureBoxLikeElements.'(.*)>/Usi', $html_array[$x - 1]) || substr($html_array[$x - 1], 0, 4) == '<!--' || preg_match('/<'.$structureBoxLikeElements.'(.*) \/>/Usi', $html_array[$x - 1])) |
|
242 | 242 | ) { |
243 | 243 | $newline = true; |
244 | 244 | } elseif ($this->formatType == 3 && ( // aestetic line break |
245 | 245 | # this element has a line break before itself |
246 | - preg_match('/<' . $esteticBoxLikeElements . '(.*)>/Usi', $html_array[$x]) || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $html_array[$x]) || # one element before is a element that has a line break after |
|
247 | - preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $html_array[$x - 1]) || substr($html_array[$x - 1], 0, 4) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $html_array[$x - 1])) |
|
246 | + preg_match('/<'.$esteticBoxLikeElements.'(.*)>/Usi', $html_array[$x]) || preg_match('/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', $html_array[$x]) || # one element before is a element that has a line break after |
|
247 | + preg_match('/<\/'.$esteticBoxLikeElements.'(.*)>/Usi', $html_array[$x - 1]) || substr($html_array[$x - 1], 0, 4) == '<!--' || preg_match('/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', $html_array[$x - 1])) |
|
248 | 248 | ) { |
249 | 249 | $newline = true; |
250 | 250 | } elseif ($this->formatType >= 4 && ( // logical line break |
251 | 251 | # this element has a line break before itself |
252 | - preg_match('/<' . $allBoxLikeElements . '(.*)>/Usi', $html_array[$x]) || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $html_array[$x]) || # one element before is a element that has a line break after |
|
253 | - preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $html_array[$x - 1]) || substr($html_array[$x - 1], 0, 4) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $html_array[$x - 1])) |
|
252 | + preg_match('/<'.$allBoxLikeElements.'(.*)>/Usi', $html_array[$x]) || preg_match('/<'.$allBoxLikeElements.'(.*) \/>/Usi', $html_array[$x]) || # one element before is a element that has a line break after |
|
253 | + preg_match('/<\/'.$allBoxLikeElements.'(.*)>/Usi', $html_array[$x - 1]) || substr($html_array[$x - 1], 0, 4) == '<!--' || preg_match('/<'.$allBoxLikeElements.'(.*) \/>/Usi', $html_array[$x - 1])) |
|
254 | 254 | ) { |
255 | 255 | $newline = true; |
256 | 256 | } |
@@ -303,7 +303,7 @@ discard block |
||
303 | 303 | |
304 | 304 | // include debug comment at the end |
305 | 305 | if ($tabs != 0 && $this->debugComment === true) { |
306 | - $html .= '<!--' . $tabs . " open elements found-->\r\n"; |
|
306 | + $html .= '<!--'.$tabs." open elements found-->\r\n"; |
|
307 | 307 | } |
308 | 308 | } |
309 | 309 | |
@@ -437,7 +437,7 @@ discard block |
||
437 | 437 | 'textarea', |
438 | 438 | 'pre' |
439 | 439 | ); // eventuell auch: span, script, style |
440 | - $peaces = preg_split('#(<(' . implode('|', $splitArray) . ').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
440 | + $peaces = preg_split('#(<('.implode('|', $splitArray).').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
441 | 441 | $html = ""; |
442 | 442 | for ($i = 0; $i < count($peaces); $i++) { |
443 | 443 | if (($i + 1) % 3 == 0) { |
@@ -580,8 +580,8 @@ discard block |
||
580 | 580 | if (!empty($this->headerComment)) { |
581 | 581 | $html = preg_replace_callback( |
582 | 582 | '/<meta http-equiv(.*)>/Usi', |
583 | - function ($matches) { |
|
584 | - return trim($matches[0] . $this->newline . $this->tab . $this->tab . '<!-- ' . $this->headerComment . '-->'); |
|
583 | + function($matches) { |
|
584 | + return trim($matches[0].$this->newline.$this->tab.$this->tab.'<!-- '.$this->headerComment.'-->'); |
|
585 | 585 | }, |
586 | 586 | $html, |
587 | 587 | 1 |
@@ -11,108 +11,108 @@ |
||
11 | 11 | class FrontendHook implements \TYPO3\CMS\Core\SingletonInterface |
12 | 12 | { |
13 | 13 | |
14 | - /** |
|
15 | - * @var \HTML\Sourceopt\Service\CleanHtmlService |
|
16 | - * @inject |
|
17 | - */ |
|
18 | - protected $cleanHtmlService = null; |
|
14 | + /** |
|
15 | + * @var \HTML\Sourceopt\Service\CleanHtmlService |
|
16 | + * @inject |
|
17 | + */ |
|
18 | + protected $cleanHtmlService = null; |
|
19 | 19 | |
20 | - /** |
|
21 | - * Initialize needed variables |
|
22 | - */ |
|
23 | - public function __construct() |
|
24 | - { |
|
25 | - $this->initialize(); |
|
26 | - } |
|
20 | + /** |
|
21 | + * Initialize needed variables |
|
22 | + */ |
|
23 | + public function __construct() |
|
24 | + { |
|
25 | + $this->initialize(); |
|
26 | + } |
|
27 | 27 | |
28 | - /** |
|
29 | - * Hook for adjusting the HTML <body> output |
|
30 | - * |
|
31 | - * @param \TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController $typoScriptFrontend |
|
32 | - * |
|
33 | - * @return void |
|
34 | - */ |
|
35 | - public function clean(\TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController &$typoScriptFrontend) |
|
36 | - { |
|
37 | - if ($this->cleanHtmlService instanceof CleanHtmlService) { |
|
38 | - $configuration = $typoScriptFrontend->config['config']['sourceopt.']; |
|
39 | - $this->cleanHtmlService->clean($typoScriptFrontend->content, $configuration); |
|
40 | - } |
|
41 | - } |
|
28 | + /** |
|
29 | + * Hook for adjusting the HTML <body> output |
|
30 | + * |
|
31 | + * @param \TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController $typoScriptFrontend |
|
32 | + * |
|
33 | + * @return void |
|
34 | + */ |
|
35 | + public function clean(\TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController &$typoScriptFrontend) |
|
36 | + { |
|
37 | + if ($this->cleanHtmlService instanceof CleanHtmlService) { |
|
38 | + $configuration = $typoScriptFrontend->config['config']['sourceopt.']; |
|
39 | + $this->cleanHtmlService->clean($typoScriptFrontend->content, $configuration); |
|
40 | + } |
|
41 | + } |
|
42 | 42 | |
43 | - /** |
|
44 | - * Clean cache content from FrontendRenderer |
|
45 | - * hook is called after Caching! |
|
46 | - * => for modification of pages with COA_/USER_INT objects. |
|
47 | - * |
|
48 | - * @param array $parameters |
|
49 | - * |
|
50 | - * @return void |
|
51 | - */ |
|
52 | - public function cleanUncachedContent(&$parameters) |
|
53 | - { |
|
54 | - $tsfe = &$parameters['pObj']; |
|
55 | - if ($tsfe instanceof \TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController) { |
|
56 | - if ($tsfe->isINTincScript() === true) { |
|
57 | - $this->clean($tsfe); |
|
58 | - } |
|
59 | - } |
|
60 | - } |
|
43 | + /** |
|
44 | + * Clean cache content from FrontendRenderer |
|
45 | + * hook is called after Caching! |
|
46 | + * => for modification of pages with COA_/USER_INT objects. |
|
47 | + * |
|
48 | + * @param array $parameters |
|
49 | + * |
|
50 | + * @return void |
|
51 | + */ |
|
52 | + public function cleanUncachedContent(&$parameters) |
|
53 | + { |
|
54 | + $tsfe = &$parameters['pObj']; |
|
55 | + if ($tsfe instanceof \TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController) { |
|
56 | + if ($tsfe->isINTincScript() === true) { |
|
57 | + $this->clean($tsfe); |
|
58 | + } |
|
59 | + } |
|
60 | + } |
|
61 | 61 | |
62 | - /** |
|
63 | - * Clean cache content from FrontendRenderer |
|
64 | - * hook is called before Caching! |
|
65 | - * => for modification of pages on their way in the cache. |
|
66 | - * |
|
67 | - * @param array $parameters |
|
68 | - * |
|
69 | - * @return void |
|
70 | - */ |
|
71 | - public function cleanCachedContent(&$parameters) |
|
72 | - { |
|
73 | - $tsfe = &$parameters['pObj']; |
|
74 | - if ($tsfe instanceof \TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController) { |
|
75 | - if ($tsfe->isINTincScript() === false) { |
|
76 | - $this->clean($tsfe); |
|
77 | - } |
|
78 | - } |
|
79 | - } |
|
62 | + /** |
|
63 | + * Clean cache content from FrontendRenderer |
|
64 | + * hook is called before Caching! |
|
65 | + * => for modification of pages on their way in the cache. |
|
66 | + * |
|
67 | + * @param array $parameters |
|
68 | + * |
|
69 | + * @return void |
|
70 | + */ |
|
71 | + public function cleanCachedContent(&$parameters) |
|
72 | + { |
|
73 | + $tsfe = &$parameters['pObj']; |
|
74 | + if ($tsfe instanceof \TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController) { |
|
75 | + if ($tsfe->isINTincScript() === false) { |
|
76 | + $this->clean($tsfe); |
|
77 | + } |
|
78 | + } |
|
79 | + } |
|
80 | 80 | |
81 | - /** |
|
82 | - * Initialize needed variables |
|
83 | - * |
|
84 | - * @return void |
|
85 | - * |
|
86 | - * @throws \TYPO3\CMS\Frontend\Exception |
|
87 | - */ |
|
88 | - protected function initialize() |
|
89 | - { |
|
90 | - if (!($GLOBALS['TSFE'] instanceof \TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController)) { |
|
91 | - throw new \TYPO3\CMS\Frontend\Exception('No frontend class rendered!'); |
|
92 | - } |
|
93 | - if ($this->cleanHtmlService === null) { |
|
94 | - /** @var CleanHtmlService $cleanHtmlService */ |
|
95 | - $this->cleanHtmlService = $this->getInstance('HTML\\Sourceopt\\Service\\CleanHtmlService'); |
|
96 | - } |
|
97 | - } |
|
81 | + /** |
|
82 | + * Initialize needed variables |
|
83 | + * |
|
84 | + * @return void |
|
85 | + * |
|
86 | + * @throws \TYPO3\CMS\Frontend\Exception |
|
87 | + */ |
|
88 | + protected function initialize() |
|
89 | + { |
|
90 | + if (!($GLOBALS['TSFE'] instanceof \TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController)) { |
|
91 | + throw new \TYPO3\CMS\Frontend\Exception('No frontend class rendered!'); |
|
92 | + } |
|
93 | + if ($this->cleanHtmlService === null) { |
|
94 | + /** @var CleanHtmlService $cleanHtmlService */ |
|
95 | + $this->cleanHtmlService = $this->getInstance('HTML\\Sourceopt\\Service\\CleanHtmlService'); |
|
96 | + } |
|
97 | + } |
|
98 | 98 | |
99 | - /** |
|
100 | - * Create instance when no object manager initiated |
|
101 | - * |
|
102 | - * @param string $class |
|
103 | - * |
|
104 | - * @return object given class |
|
105 | - */ |
|
106 | - protected function getInstance($class) |
|
107 | - { |
|
108 | - static $objectManager; |
|
109 | - if (!($objectManager instanceof \TYPO3\CMS\Extbase\Object\ObjectManager)) { |
|
110 | - $objectManager = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Extbase\\Object\\ObjectManager'); |
|
111 | - } |
|
99 | + /** |
|
100 | + * Create instance when no object manager initiated |
|
101 | + * |
|
102 | + * @param string $class |
|
103 | + * |
|
104 | + * @return object given class |
|
105 | + */ |
|
106 | + protected function getInstance($class) |
|
107 | + { |
|
108 | + static $objectManager; |
|
109 | + if (!($objectManager instanceof \TYPO3\CMS\Extbase\Object\ObjectManager)) { |
|
110 | + $objectManager = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Extbase\\Object\\ObjectManager'); |
|
111 | + } |
|
112 | 112 | |
113 | - if ($objectManager instanceof \TYPO3\CMS\Extbase\Object\ObjectManager) { |
|
114 | - return $objectManager->get($class); |
|
115 | - } |
|
116 | - return null; |
|
117 | - } |
|
113 | + if ($objectManager instanceof \TYPO3\CMS\Extbase\Object\ObjectManager) { |
|
114 | + return $objectManager->get($class); |
|
115 | + } |
|
116 | + return null; |
|
117 | + } |
|
118 | 118 | } |
@@ -32,7 +32,7 @@ |
||
32 | 32 | * |
33 | 33 | * @return void |
34 | 34 | */ |
35 | - public function clean(\TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController &$typoScriptFrontend) |
|
35 | + public function clean(\TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController&$typoScriptFrontend) |
|
36 | 36 | { |
37 | 37 | if ($this->cleanHtmlService instanceof CleanHtmlService) { |
38 | 38 | $configuration = $typoScriptFrontend->config['config']['sourceopt.']; |