Completed
Push — master ( 5ddeab...98568e )
by Tim
14:04
created
Classes/Manipulation/RemoveGenerator.php 1 patch
Indentation   +11 added lines, -11 removed lines patch added patch discarded remove patch
@@ -14,16 +14,16 @@
 block discarded – undo
14 14
  */
15 15
 class RemoveGenerator implements ManipulationInterface
16 16
 {
17
-    /**
18
-     * @param string $html          The original HTML
19
-     * @param array  $configuration Configuration
20
-     *
21
-     * @return string the manipulated HTML
22
-     */
23
-    public function manipulate(string $html, array $configuration = []):string
24
-    {
25
-        $regex = '<meta name=["\']?generator["\']? [^>]+>';
17
+	/**
18
+	 * @param string $html          The original HTML
19
+	 * @param array  $configuration Configuration
20
+	 *
21
+	 * @return string the manipulated HTML
22
+	 */
23
+	public function manipulate(string $html, array $configuration = []):string
24
+	{
25
+		$regex = '<meta name=["\']?generator["\']? [^>]+>';
26 26
 
27
-        return preg_replace('/'.$regex.'/is', '', $html);
28
-    }
27
+		return preg_replace('/'.$regex.'/is', '', $html);
28
+	}
29 29
 }
Please login to merge, or discard this patch.
Classes/Manipulation/ManipulationInterface.php 1 patch
Indentation   +7 added lines, -7 removed lines patch added patch discarded remove patch
@@ -14,11 +14,11 @@
 block discarded – undo
14 14
  */
15 15
 interface ManipulationInterface
16 16
 {
17
-    /**
18
-     * @param string $html          The original HTML
19
-     * @param array  $configuration Configuration
20
-     *
21
-     * @return string the manipulated HTML
22
-     */
23
-    public function manipulate(string $html, array $configuration = []):string;
17
+	/**
18
+	 * @param string $html          The original HTML
19
+	 * @param array  $configuration Configuration
20
+	 *
21
+	 * @return string the manipulated HTML
22
+	 */
23
+	public function manipulate(string $html, array $configuration = []):string;
24 24
 }
Please login to merge, or discard this patch.
Classes/Manipulation/RemoveComments.php 1 patch
Indentation   +57 added lines, -57 removed lines patch added patch discarded remove patch
@@ -14,67 +14,67 @@
 block discarded – undo
14 14
  */
15 15
 class RemoveComments implements ManipulationInterface
16 16
 {
17
-    /**
18
-     * Patterns for white-listing comments inside content.
19
-     *
20
-     * @var array
21
-     */
22
-    protected $whiteListCommentsPatterns = [];
17
+	/**
18
+	 * Patterns for white-listing comments inside content.
19
+	 *
20
+	 * @var array
21
+	 */
22
+	protected $whiteListCommentsPatterns = [];
23 23
 
24
-    /**
25
-     * @param string $html          The original HTML
26
-     * @param array  $configuration Configuration
27
-     *
28
-     * @return string the manipulated HTML
29
-     */
30
-    public function manipulate(string $html, array $configuration = []):string
31
-    {
32
-        if (isset($configuration['keep.'])) {
33
-            $this->whiteListCommentsPatterns = $configuration['keep.'];
34
-        }
24
+	/**
25
+	 * @param string $html          The original HTML
26
+	 * @param array  $configuration Configuration
27
+	 *
28
+	 * @return string the manipulated HTML
29
+	 */
30
+	public function manipulate(string $html, array $configuration = []):string
31
+	{
32
+		if (isset($configuration['keep.'])) {
33
+			$this->whiteListCommentsPatterns = $configuration['keep.'];
34
+		}
35 35
 
36
-        // match all comments, styles and scripts
37
-        $matches = [];
38
-        preg_match_all(
39
-            '/(?s)((<!--.*?-->)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im',
40
-            $html,
41
-            $matches
42
-        );
43
-        foreach ($matches[0] as $tag) {
44
-            if (false === $this->keepComment($tag)) {
45
-                $html = str_replace($tag, '', $html);
46
-            }
47
-        }
36
+		// match all comments, styles and scripts
37
+		$matches = [];
38
+		preg_match_all(
39
+			'/(?s)((<!--.*?-->)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im',
40
+			$html,
41
+			$matches
42
+		);
43
+		foreach ($matches[0] as $tag) {
44
+			if (false === $this->keepComment($tag)) {
45
+				$html = str_replace($tag, '', $html);
46
+			}
47
+		}
48 48
 
49
-        return $html;
50
-    }
49
+		return $html;
50
+	}
51 51
 
52
-    /**
53
-     * Check if a comment is defined to be kept in a pattern whiteListOfComments.
54
-     *
55
-     * @param string $commentHtml
56
-     *
57
-     * @return bool
58
-     */
59
-    protected function keepComment($commentHtml)
60
-    {
61
-        // if not even a comment, skip this
62
-        if (!preg_match('/^\<\!\-\-(.*?)\-\-\>$/usi', $commentHtml)) {
63
-            return true;
64
-        }
52
+	/**
53
+	 * Check if a comment is defined to be kept in a pattern whiteListOfComments.
54
+	 *
55
+	 * @param string $commentHtml
56
+	 *
57
+	 * @return bool
58
+	 */
59
+	protected function keepComment($commentHtml)
60
+	{
61
+		// if not even a comment, skip this
62
+		if (!preg_match('/^\<\!\-\-(.*?)\-\-\>$/usi', $commentHtml)) {
63
+			return true;
64
+		}
65 65
 
66
-        // if not defined in white list
67
-        if (!empty($this->whiteListCommentsPatterns)) {
68
-            $commentHtml = str_replace('<!--', '', $commentHtml);
69
-            $commentHtml = str_replace('-->', '', $commentHtml);
70
-            $commentHtml = trim($commentHtml);
71
-            foreach ($this->whiteListCommentsPatterns as $pattern) {
72
-                if (!empty($pattern) && preg_match($pattern, $commentHtml)) {
73
-                    return true;
74
-                }
75
-            }
76
-        }
66
+		// if not defined in white list
67
+		if (!empty($this->whiteListCommentsPatterns)) {
68
+			$commentHtml = str_replace('<!--', '', $commentHtml);
69
+			$commentHtml = str_replace('-->', '', $commentHtml);
70
+			$commentHtml = trim($commentHtml);
71
+			foreach ($this->whiteListCommentsPatterns as $pattern) {
72
+				if (!empty($pattern) && preg_match($pattern, $commentHtml)) {
73
+					return true;
74
+				}
75
+			}
76
+		}
77 77
 
78
-        return false;
79
-    }
78
+		return false;
79
+	}
80 80
 }
Please login to merge, or discard this patch.
Classes/Service/CleanHtmlService.php 1 patch
Indentation   +388 added lines, -388 removed lines patch added patch discarded remove patch
@@ -17,392 +17,392 @@
 block discarded – undo
17 17
  */
18 18
 class CleanHtmlService implements SingletonInterface
19 19
 {
20
-    /**
21
-     * Enable Debug comment in footer.
22
-     *
23
-     * @var bool
24
-     */
25
-    protected $debugComment = false;
26
-
27
-    /**
28
-     * Format Type.
29
-     *
30
-     * @var int
31
-     */
32
-    protected $formatType = 0;
33
-
34
-    /**
35
-     * Tab character.
36
-     *
37
-     * @var string
38
-     */
39
-    protected $tab = "\t";
40
-
41
-    /**
42
-     * Newline character.
43
-     *
44
-     * @var string
45
-     */
46
-    protected $newline = "\n";
47
-
48
-    /**
49
-     * Configured extra header comment.
50
-     *
51
-     * @var string
52
-     */
53
-    protected $headerComment = '';
54
-
55
-    /**
56
-     * Empty space char.
57
-     *
58
-     * @var string
59
-     */
60
-    protected $emptySpaceChar = ' ';
61
-
62
-    /**
63
-     * Set variables based on given config.
64
-     */
65
-    public function setVariables(array $config): void
66
-    {
67
-        if (!empty($config)) {
68
-            if ($config['formatHtml'] && is_numeric($config['formatHtml'])) {
69
-                $this->formatType = (int) $config['formatHtml'];
70
-            }
71
-
72
-            if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) {
73
-                $this->tab = str_pad('', (int) $config['formatHtml.']['tabSize'], ' ');
74
-            }
75
-
76
-            if (isset($config['formatHtml.']['debugComment'])) {
77
-                $this->debugComment = (bool) $config['formatHtml.']['debugComment'];
78
-            }
79
-
80
-            if (isset($config['headerComment'])) {
81
-                $this->headerComment = $config['headerComment'];
82
-            }
83
-
84
-            if (isset($config['dropEmptySpaceChar']) && (bool) $config['dropEmptySpaceChar']) {
85
-                $this->emptySpaceChar = '';
86
-            }
87
-        }
88
-    }
89
-
90
-    /**
91
-     * Clean given HTML with formatter.
92
-     *
93
-     * @return string
94
-     */
95
-    public function clean(string $html, array $config = [])
96
-    {
97
-        if (!empty($config)) {
98
-            $this->setVariables($config);
99
-        }
100
-        // convert line-breaks to UNIX
101
-        $this->convNlOs($html);
102
-
103
-        $manipulations = [];
104
-
105
-        if (isset($config['removeGenerator']) && (bool) $config['removeGenerator']) {
106
-            $manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class);
107
-        }
108
-
109
-        if (isset($config['removeComments']) && (bool) $config['removeComments']) {
110
-            $manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class);
111
-        }
112
-
113
-        if (!empty($this->headerComment)) {
114
-            $this->includeHeaderComment($html);
115
-        }
116
-
117
-        foreach ($manipulations as $key => $manipulation) {
118
-            /** @var ManipulationInterface $manipulation */
119
-            $configuration = isset($config[$key.'.']) && \is_array($config[$key.'.']) ? $config[$key.'.'] : [];
120
-            $html = $manipulation->manipulate($html, $configuration);
121
-        }
122
-
123
-        // cleanup HTML5 self-closing elements
124
-        if (!isset($GLOBALS['TSFE']->config['config']['doctype'])
125
-            || 'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'], 0, 1)) {
126
-            $html = preg_replace(
127
-                '/<((?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)\s[^>]+?)\s?\/>/',
128
-                '<$1>',
129
-                $html
130
-            );
131
-        }
132
-
133
-        if ($this->formatType > 0) {
134
-            $html = $this->formatHtml($html);
135
-        }
136
-        // remove white space after line ending
137
-        $this->rTrimLines($html);
138
-
139
-        // recover line-breaks
140
-        if (Environment::isWindows()) {
141
-            $html = str_replace($this->newline, "\r\n", $html);
142
-        }
143
-
144
-        return $html;
145
-    }
146
-
147
-    /**
148
-     * Formats the (X)HTML code:
149
-     *  - taps according to the hirarchy of the tags
150
-     *  - removes empty spaces between tags
151
-     *  - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..)
152
-     *  choose from five options:
153
-     *    0 => off
154
-     *    1 => no line break at all  (code in one line)
155
-     *    2 => minimalistic line breaks (structure defining box-elements)
156
-     *    3 => aesthetic line breaks (important box-elements)
157
-     *    4 => logic line breaks (all box-elements)
158
-     *    5 => max line breaks (all elements).
159
-     */
160
-    protected function formatHtml(string $html):string
161
-    {
162
-        // Save original formated pre, textarea, comments, styles and scripts & replace them with markers
163
-        preg_match_all(
164
-            '/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im',
165
-            $html,
166
-            $matches
167
-        );
168
-        $noFormat = $matches[0]; // do not format these block elements
169
-        for ($i = 0; $i < \count($noFormat); ++$i) {
170
-            $html = str_replace($noFormat[$i], "\n<!-- ELEMENT {$i} -->", $html);
171
-        }
172
-
173
-        // define box elements for formatting
174
-        $trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section';
175
-        $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
176
-        $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
177
-        $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
178
-        $allBoxLikeElements = '(?>'.$trueBoxElements.'|'.$functionalBoxElements.'|'.$usableBoxElements.'|'.$imagineBoxElements.')';
179
-        $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
180
-        $structureBoxLikeElements = '(?>html|head|body|div|!--)';
181
-
182
-        // split html into it's elements
183
-        $htmlArrayTemp = preg_split(
184
-            '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
185
-            $html,
186
-            -1,
187
-            \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY
188
-        );
189
-
190
-        if (false === $htmlArrayTemp) {
191
-            // Restore saved comments, styles and scripts
192
-            for ($i = 0; $i < \count($noFormat); ++$i) {
193
-                $html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
194
-            }
195
-
196
-            return $html;
197
-        }
198
-        // remove empty lines
199
-        $htmlArray = [''];
200
-        $index = 1;
201
-        for ($x = 0; $x < \count($htmlArrayTemp); ++$x) {
202
-            $text = trim($htmlArrayTemp[$x]);
203
-            $htmlArray[$index] = '' !== $text ? $htmlArrayTemp[$x] : $this->emptySpaceChar;
204
-            ++$index;
205
-        }
206
-
207
-        // rebuild html
208
-        $html = '';
209
-        $tabs = 0;
210
-        for ($x = 0; $x < \count($htmlArray); ++$x) {
211
-            $htmlArrayBefore = $htmlArray[$x - 1] ?? '';
212
-            $htmlArrayCurrent = $htmlArray[$x] ?? '';
213
-
214
-            // check if the element should stand in a new line
215
-            $newline = false;
216
-            if ('<?xml' == substr($htmlArrayBefore, 0, 5)) {
217
-                $newline = true;
218
-            } elseif (2 == $this->formatType && ( // minimalistic line break
219
-                // this element has a line break before itself
220
-                preg_match(
221
-                    '/<'.$structureBoxLikeElements.'(.*)>/Usi',
222
-                    $htmlArrayCurrent
223
-                ) || preg_match(
224
-                    '/<'.$structureBoxLikeElements.'(.*) \/>/Usi',
225
-                    $htmlArrayCurrent
226
-                ) // one element before is a element that has a line break after
227
-                || preg_match(
228
-                    '/<\/'.$structureBoxLikeElements.'(.*)>/Usi',
229
-                    $htmlArrayBefore
230
-                ) || '<!--' == substr(
231
-                    $htmlArrayBefore,
232
-                    0,
233
-                    4
234
-                ) || preg_match('/<'.$structureBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore))
235
-            ) {
236
-                $newline = true;
237
-            } elseif (3 == $this->formatType && ( // aestetic line break
238
-                // this element has a line break before itself
239
-                preg_match(
240
-                    '/<'.$esteticBoxLikeElements.'(.*)>/Usi',
241
-                    $htmlArrayCurrent
242
-                ) || preg_match(
243
-                    '/<'.$esteticBoxLikeElements.'(.*) \/>/Usi',
244
-                    $htmlArrayCurrent
245
-                ) // one element before is a element that has a line break after
246
-                || preg_match('/<\/'.$esteticBoxLikeElements.'(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
247
-                    $htmlArrayBefore,
248
-                    0,
249
-                    4
250
-                ) || preg_match('/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore))
251
-            ) {
252
-                $newline = true;
253
-            } elseif ($this->formatType >= 4 && ( // logical line break
254
-                // this element has a line break before itself
255
-                preg_match(
256
-                    '/<'.$allBoxLikeElements.'(.*)>/Usi',
257
-                    $htmlArrayCurrent
258
-                ) || preg_match(
259
-                    '/<'.$allBoxLikeElements.'(.*) \/>/Usi',
260
-                    $htmlArrayCurrent
261
-                ) // one element before is a element that has a line break after
262
-                || preg_match('/<\/'.$allBoxLikeElements.'(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
263
-                    $htmlArrayBefore,
264
-                    0,
265
-                    4
266
-                ) || preg_match('/<'.$allBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore))
267
-            ) {
268
-                $newline = true;
269
-            }
270
-
271
-            // count down a tab
272
-            if ('</' == substr($htmlArrayCurrent, 0, 2)) {
273
-                --$tabs;
274
-            }
275
-
276
-            // add tabs and line breaks in front of the current tag
277
-            if ($newline) {
278
-                $html .= $this->newline;
279
-                for ($y = 0; $y < $tabs; ++$y) {
280
-                    $html .= $this->tab;
281
-                }
282
-            }
283
-
284
-            // remove white spaces and line breaks and add current tag to the html-string
285
-            if ('<![CDATA[' == substr($htmlArrayCurrent, 0, 9) // remove multiple white space in CDATA / XML
286
-                || '<?xml' == substr($htmlArrayCurrent, 0, 5)
287
-            ) {
288
-                $html .= $this->killWhiteSpace($htmlArrayCurrent);
289
-            } else { // remove all line breaks
290
-                $html .= $this->killLineBreaks($htmlArrayCurrent);
291
-            }
292
-
293
-            // count up a tab
294
-            if ('<' == substr($htmlArrayCurrent, 0, 1) && '/' != substr($htmlArrayCurrent, 1, 1)) {
295
-                if (' ' !== substr($htmlArrayCurrent, 1, 1)
296
-                    && 'img' !== substr($htmlArrayCurrent, 1, 3)
297
-                    && 'source' !== substr($htmlArrayCurrent, 1, 6)
298
-                    && 'br' !== substr($htmlArrayCurrent, 1, 2)
299
-                    && 'hr' !== substr($htmlArrayCurrent, 1, 2)
300
-                    && 'input' !== substr($htmlArrayCurrent, 1, 5)
301
-                    && 'link' !== substr($htmlArrayCurrent, 1, 4)
302
-                    && 'meta' !== substr($htmlArrayCurrent, 1, 4)
303
-                    && 'col ' !== substr($htmlArrayCurrent, 1, 4)
304
-                    && 'frame' !== substr($htmlArrayCurrent, 1, 5)
305
-                    && 'isindex' !== substr($htmlArrayCurrent, 1, 7)
306
-                    && 'param' !== substr($htmlArrayCurrent, 1, 5)
307
-                    && 'area' !== substr($htmlArrayCurrent, 1, 4)
308
-                    && 'base' !== substr($htmlArrayCurrent, 1, 4)
309
-                    && '<!' !== substr($htmlArrayCurrent, 0, 2)
310
-                    && '<?xml' !== substr($htmlArrayCurrent, 0, 5)
311
-                ) {
312
-                    ++$tabs;
313
-                }
314
-            }
315
-        }
316
-
317
-        // Remove empty lines
318
-        if ($this->formatType > 1) {
319
-            $this->removeEmptyLines($html);
320
-        }
321
-
322
-        // Restore saved comments, styles and scripts
323
-        for ($i = 0; $i < \count($noFormat); ++$i) {
324
-            $html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
325
-        }
326
-
327
-        // include debug comment at the end
328
-        if (0 != $tabs && true === $this->debugComment) {
329
-            $html .= "<!-- {$tabs} open elements found -->";
330
-        }
331
-
332
-        return $html;
333
-    }
334
-
335
-    /**
336
-     * Remove ALL line breaks and multiple white space.
337
-     *
338
-     * @param string $html
339
-     *
340
-     * @return string
341
-     */
342
-    protected function killLineBreaks($html)
343
-    {
344
-        $html = str_replace($this->newline, '', $html);
345
-
346
-        return preg_replace('/\s\s+/u', ' ', $html);
347
-        // ? return preg_replace('/\n|\s+(\s)/u', '$1', $html);
348
-    }
349
-
350
-    /**
351
-     * Remove multiple white space, keeps line breaks.
352
-     */
353
-    protected function killWhiteSpace(string $html): string
354
-    {
355
-        $temp = explode($this->newline, $html);
356
-        for ($i = 0; $i < \count($temp); ++$i) {
357
-            if (!trim($temp[$i])) {
358
-                unset($temp[$i]);
359
-                continue;
360
-            }
361
-
362
-            $temp[$i] = trim($temp[$i]);
363
-            $temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]);
364
-        }
365
-
366
-        return implode($this->newline, $temp);
367
-    }
368
-
369
-    /**
370
-     * Remove white space at the end of lines, keeps other white space and line breaks.
371
-     */
372
-    protected function rTrimLines(string &$html): void
373
-    {
374
-        $html = preg_replace('/\s+$/m', '', $html);
375
-    }
376
-
377
-    /**
378
-     * Convert newlines according to the current OS.
379
-     */
380
-    protected function convNlOs(string &$html): void
381
-    {
382
-        $html = preg_replace("(\r\n|\r)", $this->newline, $html);
383
-    }
384
-
385
-    /**
386
-     * Remove empty lines.
387
-     */
388
-    protected function removeEmptyLines(string &$html): void
389
-    {
390
-        $temp = explode($this->newline, $html);
391
-        $result = [];
392
-        for ($i = 0; $i < \count($temp); ++$i) {
393
-            if ('' == trim($temp[$i])) {
394
-                continue;
395
-            }
396
-            $result[] = $temp[$i];
397
-        }
398
-        $html = implode($this->newline, $result);
399
-    }
400
-
401
-    /**
402
-     * Include configured header comment in HTML content block.
403
-     */
404
-    public function includeHeaderComment(string &$html): void
405
-    {
406
-        $html = preg_replace('/^(-->)$/m', "\n\t".$this->headerComment."\n$1", $html);
407
-    }
20
+	/**
21
+	 * Enable Debug comment in footer.
22
+	 *
23
+	 * @var bool
24
+	 */
25
+	protected $debugComment = false;
26
+
27
+	/**
28
+	 * Format Type.
29
+	 *
30
+	 * @var int
31
+	 */
32
+	protected $formatType = 0;
33
+
34
+	/**
35
+	 * Tab character.
36
+	 *
37
+	 * @var string
38
+	 */
39
+	protected $tab = "\t";
40
+
41
+	/**
42
+	 * Newline character.
43
+	 *
44
+	 * @var string
45
+	 */
46
+	protected $newline = "\n";
47
+
48
+	/**
49
+	 * Configured extra header comment.
50
+	 *
51
+	 * @var string
52
+	 */
53
+	protected $headerComment = '';
54
+
55
+	/**
56
+	 * Empty space char.
57
+	 *
58
+	 * @var string
59
+	 */
60
+	protected $emptySpaceChar = ' ';
61
+
62
+	/**
63
+	 * Set variables based on given config.
64
+	 */
65
+	public function setVariables(array $config): void
66
+	{
67
+		if (!empty($config)) {
68
+			if ($config['formatHtml'] && is_numeric($config['formatHtml'])) {
69
+				$this->formatType = (int) $config['formatHtml'];
70
+			}
71
+
72
+			if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) {
73
+				$this->tab = str_pad('', (int) $config['formatHtml.']['tabSize'], ' ');
74
+			}
75
+
76
+			if (isset($config['formatHtml.']['debugComment'])) {
77
+				$this->debugComment = (bool) $config['formatHtml.']['debugComment'];
78
+			}
79
+
80
+			if (isset($config['headerComment'])) {
81
+				$this->headerComment = $config['headerComment'];
82
+			}
83
+
84
+			if (isset($config['dropEmptySpaceChar']) && (bool) $config['dropEmptySpaceChar']) {
85
+				$this->emptySpaceChar = '';
86
+			}
87
+		}
88
+	}
89
+
90
+	/**
91
+	 * Clean given HTML with formatter.
92
+	 *
93
+	 * @return string
94
+	 */
95
+	public function clean(string $html, array $config = [])
96
+	{
97
+		if (!empty($config)) {
98
+			$this->setVariables($config);
99
+		}
100
+		// convert line-breaks to UNIX
101
+		$this->convNlOs($html);
102
+
103
+		$manipulations = [];
104
+
105
+		if (isset($config['removeGenerator']) && (bool) $config['removeGenerator']) {
106
+			$manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class);
107
+		}
108
+
109
+		if (isset($config['removeComments']) && (bool) $config['removeComments']) {
110
+			$manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class);
111
+		}
112
+
113
+		if (!empty($this->headerComment)) {
114
+			$this->includeHeaderComment($html);
115
+		}
116
+
117
+		foreach ($manipulations as $key => $manipulation) {
118
+			/** @var ManipulationInterface $manipulation */
119
+			$configuration = isset($config[$key.'.']) && \is_array($config[$key.'.']) ? $config[$key.'.'] : [];
120
+			$html = $manipulation->manipulate($html, $configuration);
121
+		}
122
+
123
+		// cleanup HTML5 self-closing elements
124
+		if (!isset($GLOBALS['TSFE']->config['config']['doctype'])
125
+			|| 'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'], 0, 1)) {
126
+			$html = preg_replace(
127
+				'/<((?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)\s[^>]+?)\s?\/>/',
128
+				'<$1>',
129
+				$html
130
+			);
131
+		}
132
+
133
+		if ($this->formatType > 0) {
134
+			$html = $this->formatHtml($html);
135
+		}
136
+		// remove white space after line ending
137
+		$this->rTrimLines($html);
138
+
139
+		// recover line-breaks
140
+		if (Environment::isWindows()) {
141
+			$html = str_replace($this->newline, "\r\n", $html);
142
+		}
143
+
144
+		return $html;
145
+	}
146
+
147
+	/**
148
+	 * Formats the (X)HTML code:
149
+	 *  - taps according to the hirarchy of the tags
150
+	 *  - removes empty spaces between tags
151
+	 *  - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..)
152
+	 *  choose from five options:
153
+	 *    0 => off
154
+	 *    1 => no line break at all  (code in one line)
155
+	 *    2 => minimalistic line breaks (structure defining box-elements)
156
+	 *    3 => aesthetic line breaks (important box-elements)
157
+	 *    4 => logic line breaks (all box-elements)
158
+	 *    5 => max line breaks (all elements).
159
+	 */
160
+	protected function formatHtml(string $html):string
161
+	{
162
+		// Save original formated pre, textarea, comments, styles and scripts & replace them with markers
163
+		preg_match_all(
164
+			'/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im',
165
+			$html,
166
+			$matches
167
+		);
168
+		$noFormat = $matches[0]; // do not format these block elements
169
+		for ($i = 0; $i < \count($noFormat); ++$i) {
170
+			$html = str_replace($noFormat[$i], "\n<!-- ELEMENT {$i} -->", $html);
171
+		}
172
+
173
+		// define box elements for formatting
174
+		$trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section';
175
+		$functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
176
+		$usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
177
+		$imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
178
+		$allBoxLikeElements = '(?>'.$trueBoxElements.'|'.$functionalBoxElements.'|'.$usableBoxElements.'|'.$imagineBoxElements.')';
179
+		$esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
180
+		$structureBoxLikeElements = '(?>html|head|body|div|!--)';
181
+
182
+		// split html into it's elements
183
+		$htmlArrayTemp = preg_split(
184
+			'/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
185
+			$html,
186
+			-1,
187
+			\PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY
188
+		);
189
+
190
+		if (false === $htmlArrayTemp) {
191
+			// Restore saved comments, styles and scripts
192
+			for ($i = 0; $i < \count($noFormat); ++$i) {
193
+				$html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
194
+			}
195
+
196
+			return $html;
197
+		}
198
+		// remove empty lines
199
+		$htmlArray = [''];
200
+		$index = 1;
201
+		for ($x = 0; $x < \count($htmlArrayTemp); ++$x) {
202
+			$text = trim($htmlArrayTemp[$x]);
203
+			$htmlArray[$index] = '' !== $text ? $htmlArrayTemp[$x] : $this->emptySpaceChar;
204
+			++$index;
205
+		}
206
+
207
+		// rebuild html
208
+		$html = '';
209
+		$tabs = 0;
210
+		for ($x = 0; $x < \count($htmlArray); ++$x) {
211
+			$htmlArrayBefore = $htmlArray[$x - 1] ?? '';
212
+			$htmlArrayCurrent = $htmlArray[$x] ?? '';
213
+
214
+			// check if the element should stand in a new line
215
+			$newline = false;
216
+			if ('<?xml' == substr($htmlArrayBefore, 0, 5)) {
217
+				$newline = true;
218
+			} elseif (2 == $this->formatType && ( // minimalistic line break
219
+				// this element has a line break before itself
220
+				preg_match(
221
+					'/<'.$structureBoxLikeElements.'(.*)>/Usi',
222
+					$htmlArrayCurrent
223
+				) || preg_match(
224
+					'/<'.$structureBoxLikeElements.'(.*) \/>/Usi',
225
+					$htmlArrayCurrent
226
+				) // one element before is a element that has a line break after
227
+				|| preg_match(
228
+					'/<\/'.$structureBoxLikeElements.'(.*)>/Usi',
229
+					$htmlArrayBefore
230
+				) || '<!--' == substr(
231
+					$htmlArrayBefore,
232
+					0,
233
+					4
234
+				) || preg_match('/<'.$structureBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore))
235
+			) {
236
+				$newline = true;
237
+			} elseif (3 == $this->formatType && ( // aestetic line break
238
+				// this element has a line break before itself
239
+				preg_match(
240
+					'/<'.$esteticBoxLikeElements.'(.*)>/Usi',
241
+					$htmlArrayCurrent
242
+				) || preg_match(
243
+					'/<'.$esteticBoxLikeElements.'(.*) \/>/Usi',
244
+					$htmlArrayCurrent
245
+				) // one element before is a element that has a line break after
246
+				|| preg_match('/<\/'.$esteticBoxLikeElements.'(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
247
+					$htmlArrayBefore,
248
+					0,
249
+					4
250
+				) || preg_match('/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore))
251
+			) {
252
+				$newline = true;
253
+			} elseif ($this->formatType >= 4 && ( // logical line break
254
+				// this element has a line break before itself
255
+				preg_match(
256
+					'/<'.$allBoxLikeElements.'(.*)>/Usi',
257
+					$htmlArrayCurrent
258
+				) || preg_match(
259
+					'/<'.$allBoxLikeElements.'(.*) \/>/Usi',
260
+					$htmlArrayCurrent
261
+				) // one element before is a element that has a line break after
262
+				|| preg_match('/<\/'.$allBoxLikeElements.'(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
263
+					$htmlArrayBefore,
264
+					0,
265
+					4
266
+				) || preg_match('/<'.$allBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore))
267
+			) {
268
+				$newline = true;
269
+			}
270
+
271
+			// count down a tab
272
+			if ('</' == substr($htmlArrayCurrent, 0, 2)) {
273
+				--$tabs;
274
+			}
275
+
276
+			// add tabs and line breaks in front of the current tag
277
+			if ($newline) {
278
+				$html .= $this->newline;
279
+				for ($y = 0; $y < $tabs; ++$y) {
280
+					$html .= $this->tab;
281
+				}
282
+			}
283
+
284
+			// remove white spaces and line breaks and add current tag to the html-string
285
+			if ('<![CDATA[' == substr($htmlArrayCurrent, 0, 9) // remove multiple white space in CDATA / XML
286
+				|| '<?xml' == substr($htmlArrayCurrent, 0, 5)
287
+			) {
288
+				$html .= $this->killWhiteSpace($htmlArrayCurrent);
289
+			} else { // remove all line breaks
290
+				$html .= $this->killLineBreaks($htmlArrayCurrent);
291
+			}
292
+
293
+			// count up a tab
294
+			if ('<' == substr($htmlArrayCurrent, 0, 1) && '/' != substr($htmlArrayCurrent, 1, 1)) {
295
+				if (' ' !== substr($htmlArrayCurrent, 1, 1)
296
+					&& 'img' !== substr($htmlArrayCurrent, 1, 3)
297
+					&& 'source' !== substr($htmlArrayCurrent, 1, 6)
298
+					&& 'br' !== substr($htmlArrayCurrent, 1, 2)
299
+					&& 'hr' !== substr($htmlArrayCurrent, 1, 2)
300
+					&& 'input' !== substr($htmlArrayCurrent, 1, 5)
301
+					&& 'link' !== substr($htmlArrayCurrent, 1, 4)
302
+					&& 'meta' !== substr($htmlArrayCurrent, 1, 4)
303
+					&& 'col ' !== substr($htmlArrayCurrent, 1, 4)
304
+					&& 'frame' !== substr($htmlArrayCurrent, 1, 5)
305
+					&& 'isindex' !== substr($htmlArrayCurrent, 1, 7)
306
+					&& 'param' !== substr($htmlArrayCurrent, 1, 5)
307
+					&& 'area' !== substr($htmlArrayCurrent, 1, 4)
308
+					&& 'base' !== substr($htmlArrayCurrent, 1, 4)
309
+					&& '<!' !== substr($htmlArrayCurrent, 0, 2)
310
+					&& '<?xml' !== substr($htmlArrayCurrent, 0, 5)
311
+				) {
312
+					++$tabs;
313
+				}
314
+			}
315
+		}
316
+
317
+		// Remove empty lines
318
+		if ($this->formatType > 1) {
319
+			$this->removeEmptyLines($html);
320
+		}
321
+
322
+		// Restore saved comments, styles and scripts
323
+		for ($i = 0; $i < \count($noFormat); ++$i) {
324
+			$html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
325
+		}
326
+
327
+		// include debug comment at the end
328
+		if (0 != $tabs && true === $this->debugComment) {
329
+			$html .= "<!-- {$tabs} open elements found -->";
330
+		}
331
+
332
+		return $html;
333
+	}
334
+
335
+	/**
336
+	 * Remove ALL line breaks and multiple white space.
337
+	 *
338
+	 * @param string $html
339
+	 *
340
+	 * @return string
341
+	 */
342
+	protected function killLineBreaks($html)
343
+	{
344
+		$html = str_replace($this->newline, '', $html);
345
+
346
+		return preg_replace('/\s\s+/u', ' ', $html);
347
+		// ? return preg_replace('/\n|\s+(\s)/u', '$1', $html);
348
+	}
349
+
350
+	/**
351
+	 * Remove multiple white space, keeps line breaks.
352
+	 */
353
+	protected function killWhiteSpace(string $html): string
354
+	{
355
+		$temp = explode($this->newline, $html);
356
+		for ($i = 0; $i < \count($temp); ++$i) {
357
+			if (!trim($temp[$i])) {
358
+				unset($temp[$i]);
359
+				continue;
360
+			}
361
+
362
+			$temp[$i] = trim($temp[$i]);
363
+			$temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]);
364
+		}
365
+
366
+		return implode($this->newline, $temp);
367
+	}
368
+
369
+	/**
370
+	 * Remove white space at the end of lines, keeps other white space and line breaks.
371
+	 */
372
+	protected function rTrimLines(string &$html): void
373
+	{
374
+		$html = preg_replace('/\s+$/m', '', $html);
375
+	}
376
+
377
+	/**
378
+	 * Convert newlines according to the current OS.
379
+	 */
380
+	protected function convNlOs(string &$html): void
381
+	{
382
+		$html = preg_replace("(\r\n|\r)", $this->newline, $html);
383
+	}
384
+
385
+	/**
386
+	 * Remove empty lines.
387
+	 */
388
+	protected function removeEmptyLines(string &$html): void
389
+	{
390
+		$temp = explode($this->newline, $html);
391
+		$result = [];
392
+		for ($i = 0; $i < \count($temp); ++$i) {
393
+			if ('' == trim($temp[$i])) {
394
+				continue;
395
+			}
396
+			$result[] = $temp[$i];
397
+		}
398
+		$html = implode($this->newline, $result);
399
+	}
400
+
401
+	/**
402
+	 * Include configured header comment in HTML content block.
403
+	 */
404
+	public function includeHeaderComment(string &$html): void
405
+	{
406
+		$html = preg_replace('/^(-->)$/m', "\n\t".$this->headerComment."\n$1", $html);
407
+	}
408 408
 }
Please login to merge, or discard this patch.
Classes/Middleware/CleanHtmlMiddleware.php 1 patch
Indentation   +35 added lines, -35 removed lines patch added patch discarded remove patch
@@ -19,39 +19,39 @@
 block discarded – undo
19 19
  */
20 20
 class CleanHtmlMiddleware implements MiddlewareInterface
21 21
 {
22
-    /**
23
-     * @var CleanHtmlService
24
-     */
25
-    protected $cleanHtmlService;
26
-
27
-    public function __construct()
28
-    {
29
-        $this->cleanHtmlService = GeneralUtility::makeInstance(CleanHtmlService::class);
30
-    }
31
-
32
-    /**
33
-     * Clean the HTML output.
34
-     */
35
-    public function process(ServerRequestInterface $request, RequestHandlerInterface $handler): ResponseInterface
36
-    {
37
-        $response = $handler->handle($request);
38
-
39
-        if (!($response instanceof NullResponse)
40
-        && $GLOBALS['TSFE'] instanceof TypoScriptFrontendController
41
-        && ($GLOBALS['TSFE']->config['config']['sourceopt.']['enabled'] ?? false)
42
-        && 'text/html' == substr($response->getHeaderLine('Content-Type'), 0, 9)
43
-        ) {
44
-            $processedHtml = $this->cleanHtmlService->clean(
45
-                $response->getBody()->__toString(),
46
-                (array) $GLOBALS['TSFE']->config['config']['sourceopt.']
47
-            );
48
-
49
-            // Replace old body with $processedHtml
50
-            $responseBody = new Stream('php://temp', 'rw');
51
-            $responseBody->write($processedHtml);
52
-            $response = $response->withBody($responseBody);
53
-        }
54
-
55
-        return $response;
56
-    }
22
+	/**
23
+	 * @var CleanHtmlService
24
+	 */
25
+	protected $cleanHtmlService;
26
+
27
+	public function __construct()
28
+	{
29
+		$this->cleanHtmlService = GeneralUtility::makeInstance(CleanHtmlService::class);
30
+	}
31
+
32
+	/**
33
+	 * Clean the HTML output.
34
+	 */
35
+	public function process(ServerRequestInterface $request, RequestHandlerInterface $handler): ResponseInterface
36
+	{
37
+		$response = $handler->handle($request);
38
+
39
+		if (!($response instanceof NullResponse)
40
+		&& $GLOBALS['TSFE'] instanceof TypoScriptFrontendController
41
+		&& ($GLOBALS['TSFE']->config['config']['sourceopt.']['enabled'] ?? false)
42
+		&& 'text/html' == substr($response->getHeaderLine('Content-Type'), 0, 9)
43
+		) {
44
+			$processedHtml = $this->cleanHtmlService->clean(
45
+				$response->getBody()->__toString(),
46
+				(array) $GLOBALS['TSFE']->config['config']['sourceopt.']
47
+			);
48
+
49
+			// Replace old body with $processedHtml
50
+			$responseBody = new Stream('php://temp', 'rw');
51
+			$responseBody->write($processedHtml);
52
+			$response = $response->withBody($responseBody);
53
+		}
54
+
55
+		return $response;
56
+	}
57 57
 }
Please login to merge, or discard this patch.