Completed
Push — master ( 29f062...05bf68 )
by Tim
22s queued 16s
created
Classes/Middleware/CleanHtmlMiddleware.php 1 patch
Indentation   +36 added lines, -36 removed lines patch added patch discarded remove patch
@@ -17,40 +17,40 @@
 block discarded – undo
17 17
  */
18 18
 class CleanHtmlMiddleware implements MiddlewareInterface
19 19
 {
20
-    /**
21
-     * @var CleanHtmlService
22
-     */
23
-    protected $cleanHtmlService = null;
24
-
25
-    public function __construct()
26
-    {
27
-        $this->cleanHtmlService = GeneralUtility::makeInstance(CleanHtmlService::class);
28
-    }
29
-
30
-    /**
31
-     * Clean the HTML output
32
-     *
33
-     * @param ServerRequestInterface $request
34
-     * @param RequestHandlerInterface $handler
35
-     * @return ResponseInterface
36
-     */
37
-    public function process(ServerRequestInterface $request, RequestHandlerInterface $handler): ResponseInterface
38
-    {
39
-        $response = $handler->handle($request);
40
-
41
-        if (!($response instanceof NullResponse)
42
-            && $GLOBALS['TSFE'] instanceof TypoScriptFrontendController) {
43
-            $processedHtml = $this->cleanHtmlService->clean(
44
-                $response->getBody()->__toString(),
45
-                $GLOBALS['TSFE']->config['config']['sourceopt.']
46
-            );
47
-
48
-            // Replace old body with $processedHtml
49
-            $responseBody = new Stream('php://temp', 'rw');
50
-            $responseBody->write($processedHtml);
51
-            $response = $response->withBody($responseBody);
52
-        }
53
-
54
-        return $response;
55
-    }
20
+	/**
21
+	 * @var CleanHtmlService
22
+	 */
23
+	protected $cleanHtmlService = null;
24
+
25
+	public function __construct()
26
+	{
27
+		$this->cleanHtmlService = GeneralUtility::makeInstance(CleanHtmlService::class);
28
+	}
29
+
30
+	/**
31
+	 * Clean the HTML output
32
+	 *
33
+	 * @param ServerRequestInterface $request
34
+	 * @param RequestHandlerInterface $handler
35
+	 * @return ResponseInterface
36
+	 */
37
+	public function process(ServerRequestInterface $request, RequestHandlerInterface $handler): ResponseInterface
38
+	{
39
+		$response = $handler->handle($request);
40
+
41
+		if (!($response instanceof NullResponse)
42
+			&& $GLOBALS['TSFE'] instanceof TypoScriptFrontendController) {
43
+			$processedHtml = $this->cleanHtmlService->clean(
44
+				$response->getBody()->__toString(),
45
+				$GLOBALS['TSFE']->config['config']['sourceopt.']
46
+			);
47
+
48
+			// Replace old body with $processedHtml
49
+			$responseBody = new Stream('php://temp', 'rw');
50
+			$responseBody->write($processedHtml);
51
+			$response = $response->withBody($responseBody);
52
+		}
53
+
54
+		return $response;
55
+	}
56 56
 }
Please login to merge, or discard this patch.
Classes/Service/CleanHtmlService.php 2 patches
Indentation   +416 added lines, -416 removed lines patch added patch discarded remove patch
@@ -16,420 +16,420 @@
 block discarded – undo
16 16
 class CleanHtmlService implements SingletonInterface
17 17
 {
18 18
 
19
-    /**
20
-     * Enable Debug comment in footer
21
-     *
22
-     * @var boolean
23
-     */
24
-    protected $debugComment = false;
25
-
26
-    /**
27
-     * Format Type
28
-     *
29
-     * @var integer
30
-     */
31
-    protected $formatType = 0;
32
-
33
-    /**
34
-     * Tab character
35
-     *
36
-     * @var string
37
-     */
38
-    protected $tab = "\t";
39
-
40
-    /**
41
-     * Newline character
42
-     *
43
-     * @var string
44
-     */
45
-    protected $newline = "\n";
46
-
47
-    /**
48
-     * Configured extra header comment
49
-     *
50
-     * @var string
51
-     */
52
-    protected $headerComment = '';
53
-
54
-    /**
55
-     * Empty space char
56
-     * @var string
57
-     */
58
-    protected $emptySpaceChar = ' ';
59
-
60
-    /**
61
-     * Set variables based on given config
62
-     *
63
-     * @param array $config
64
-     *
65
-     * @return void
66
-     */
67
-    public function setVariables(array $config)
68
-    {
69
-        if (!empty($config)) {
70
-            if ($config['formatHtml'] && is_numeric($config['formatHtml'])) {
71
-                $this->formatType = (int)$config['formatHtml'];
72
-            }
73
-
74
-            if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) {
75
-                $this->tab = str_pad('', $config['formatHtml.']['tabSize'], ' ');
76
-            }
77
-
78
-            if (isset($config['formatHtml.']['debugComment'])) {
79
-                $this->debugComment = (bool)$config['formatHtml.']['debugComment'];
80
-            }
81
-
82
-            if (isset($config['headerComment'])) {
83
-                $this->headerComment = $config['headerComment'];
84
-            }
85
-
86
-            if (isset($config['dropEmptySpaceChar']) && (bool)$config['dropEmptySpaceChar']) {
87
-                $this->emptySpaceChar = '';
88
-            }
89
-        }
90
-    }
91
-
92
-    /**
93
-     * Clean given HTML with formatter
94
-     *
95
-     * @param string $html
96
-     * @param array $config
97
-     *
98
-     * @return string
99
-     */
100
-    public function clean($html, $config = [])
101
-    {
102
-        if (!empty($config)) {
103
-            if ((bool)$config['enabled'] === false) {
104
-                return $html;
105
-            }
106
-
107
-            $this->setVariables($config);
108
-        }
109
-        // convert line-breaks to UNIX
110
-        $this->convNlOs($html);
111
-
112
-        $manipulations = [];
113
-
114
-        if (isset($config['removeGenerator']) && (bool)$config['removeGenerator']) {
115
-            $manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class);
116
-        }
117
-
118
-        if (isset($config['removeComments']) && (bool)$config['removeComments']) {
119
-            $manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class);
120
-        }
121
-
122
-        if (!empty($this->headerComment)) {
123
-            $this->includeHeaderComment($html);
124
-        }
125
-
126
-        foreach ($manipulations as $key => $manipulation) {
127
-            /** @var ManipulationInterface $manipulation */
128
-            $configuration = isset($config[$key . '.']) && is_array($config[$key . '.']) ? $config[$key . '.'] : [];
129
-            $html = $manipulation->manipulate($html, $configuration);
130
-        }
131
-
132
-        // cleanup HTML5 self-closing elements
133
-        if (!isset($GLOBALS['TSFE']->config['config']['doctype']) ||
134
-            'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'], 0, 1)) {
135
-            $html = preg_replace(
136
-                '/<((?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)\s[^>]+?)\s?\/>/',
137
-                '<$1>',
138
-                $html
139
-            );
140
-        }
141
-
142
-        if ($this->formatType > 0) {
143
-            $html = $this->formatHtml($html);
144
-        }
145
-        // remove white space after line ending
146
-        $this->rTrimLines($html);
147
-
148
-        // recover line-breaks
149
-        if (Environment::isWindows()) {
150
-            $html = str_replace($this->newline, "\r\n", $html);
151
-        }
152
-
153
-        return $html;
154
-    }
155
-
156
-    /**
157
-     * Formats the (X)HTML code:
158
-     *  - taps according to the hirarchy of the tags
159
-     *  - removes empty spaces between tags
160
-     *  - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..)
161
-     *  choose from five options:
162
-     *    0 => off
163
-     *    1 => no line break at all  (code in one line)
164
-     *    2 => minimalistic line breaks (structure defining box-elements)
165
-     *    3 => aesthetic line breaks (important box-elements)
166
-     *    4 => logic line breaks (all box-elements)
167
-     *    5 => max line breaks (all elements)
168
-     *
169
-     * @param string $html
170
-     *
171
-     * @return string
172
-     */
173
-    protected function formatHtml($html)
174
-    {
175
-        // Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers
176
-        preg_match_all(
177
-            '/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im',
178
-            $html,
179
-            $matches
180
-        );
181
-        $noFormat = $matches[0]; // do not format these block elements
182
-        for ($i = 0; $i < count($noFormat); $i++) {
183
-            $html = str_replace($noFormat[$i], "\n<!-- ELEMENT $i -->", $html);
184
-        }
185
-
186
-        // define box elements for formatting
187
-        $trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section';
188
-        $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
189
-        $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
190
-        $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
191
-        $allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')';
192
-        $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
193
-        $structureBoxLikeElements = '(?>html|head|body|div|!--)';
194
-
195
-        // split html into it's elements
196
-        $htmlArrayTemp = preg_split(
197
-            '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
198
-            $html,
199
-            -1,
200
-            PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY
201
-        );
202
-
203
-        if ($htmlArrayTemp === false) {
204
-            // Restore saved comments, styles and java-scripts
205
-            for ($i = 0; $i < count($noFormat); $i++) {
206
-                $html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html);
207
-            }
208
-            return $html;
209
-        }
210
-        // remove empty lines
211
-        $htmlArray = [''];
212
-        $index = 1;
213
-        for ($x = 0; $x < count($htmlArrayTemp); $x++) {
214
-            $text = trim($htmlArrayTemp[$x]);
215
-            $htmlArray[$index] = $text !== '' ? $htmlArrayTemp[$x] : $this->emptySpaceChar;
216
-            $index++;
217
-        }
218
-
219
-        // rebuild html
220
-        $html = '';
221
-        $tabs = 0;
222
-        for ($x = 0; $x < count($htmlArray); $x++) {
223
-            // check if the element should stand in a new line
224
-            $newline = false;
225
-            if (substr($htmlArray[$x - 1], 0, 5) == '<?xml') {
226
-                $newline = true;
227
-            } elseif ($this->formatType == 2 && ( // minimalistic line break
228
-                    # this element has a line break before itself
229
-                    preg_match(
230
-                        '/<' . $structureBoxLikeElements . '(.*)>/Usi',
231
-                        $htmlArray[$x]
232
-                    ) || preg_match(
233
-                        '/<' . $structureBoxLikeElements . '(.*) \/>/Usi',
234
-                        $htmlArray[$x]
235
-                    ) || # one element before is a element that has a line break after
236
-                    preg_match(
237
-                        '/<\/' . $structureBoxLikeElements . '(.*)>/Usi',
238
-                        $htmlArray[$x - 1]
239
-                    ) || substr(
240
-                        $htmlArray[$x - 1],
241
-                        0,
242
-                        4
243
-                    ) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
244
-            ) {
245
-                $newline = true;
246
-            } elseif ($this->formatType == 3 && ( // aestetic line break
247
-                    # this element has a line break before itself
248
-                    preg_match(
249
-                        '/<' . $esteticBoxLikeElements . '(.*)>/Usi',
250
-                        $htmlArray[$x]
251
-                    ) || preg_match(
252
-                        '/<' . $esteticBoxLikeElements . '(.*) \/>/Usi',
253
-                        $htmlArray[$x]
254
-                    ) || # one element before is a element that has a line break after
255
-                    preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr(
256
-                        $htmlArray[$x - 1],
257
-                        0,
258
-                        4
259
-                    ) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
260
-            ) {
261
-                $newline = true;
262
-            } elseif ($this->formatType >= 4 && ( // logical line break
263
-                    # this element has a line break before itself
264
-                    preg_match(
265
-                        '/<' . $allBoxLikeElements . '(.*)>/Usi',
266
-                        $htmlArray[$x]
267
-                    ) || preg_match(
268
-                        '/<' . $allBoxLikeElements . '(.*) \/>/Usi',
269
-                        $htmlArray[$x]
270
-                    ) || # one element before is a element that has a line break after
271
-                    preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr(
272
-                        $htmlArray[$x - 1],
273
-                        0,
274
-                        4
275
-                    ) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
276
-            ) {
277
-                $newline = true;
278
-            }
279
-
280
-            // count down a tab
281
-            if (substr($htmlArray[$x], 0, 2) == '</') {
282
-                $tabs--;
283
-            }
284
-
285
-            // add tabs and line breaks in front of the current tag
286
-            if ($newline) {
287
-                $html .= $this->newline;
288
-                for ($y = 0; $y < $tabs; $y++) {
289
-                    $html .= $this->tab;
290
-                }
291
-            }
292
-
293
-            // remove white spaces and line breaks and add current tag to the html-string
294
-            if (substr($htmlArray[$x], 0, 9) == '<![CDATA[' // remove multiple white space in CDATA / XML
295
-                || substr($htmlArray[$x], 0, 5) == '<?xml'
296
-            ) {
297
-                $html .= $this->killWhiteSpace($htmlArray[$x]);
298
-            } else { // remove all line breaks
299
-                $html .= $this->killLineBreaks($htmlArray[$x]);
300
-            }
301
-
302
-            // count up a tab
303
-            if (substr($htmlArray[$x], 0, 1) == '<' && substr($htmlArray[$x], 1, 1) != '/') {
304
-                if (substr($htmlArray[$x], 1, 1) !== ' '
305
-                    && substr($htmlArray[$x], 1, 3) !== 'img'
306
-                    && substr($htmlArray[$x], 1, 6) !== 'source'
307
-                    && substr($htmlArray[$x], 1, 2) !== 'br'
308
-                    && substr($htmlArray[$x], 1, 2) !== 'hr'
309
-                    && substr($htmlArray[$x], 1, 5) !== 'input'
310
-                    && substr($htmlArray[$x], 1, 4) !== 'link'
311
-                    && substr($htmlArray[$x], 1, 4) !== 'meta'
312
-                    && substr($htmlArray[$x], 1, 4) !== 'col '
313
-                    && substr($htmlArray[$x], 1, 5) !== 'frame'
314
-                    && substr($htmlArray[$x], 1, 7) !== 'isindex'
315
-                    && substr($htmlArray[$x], 1, 5) !== 'param'
316
-                    && substr($htmlArray[$x], 1, 4) !== 'area'
317
-                    && substr($htmlArray[$x], 1, 4) !== 'base'
318
-                    && substr($htmlArray[$x], 0, 2) !== '<!'
319
-                    && substr($htmlArray[$x], 0, 5) !== '<?xml'
320
-                ) {
321
-                    $tabs++;
322
-                }
323
-            }
324
-        }
325
-
326
-        // Remove empty lines
327
-        if ($this->formatType > 1) {
328
-            $this->removeEmptyLines($html);
329
-        }
330
-
331
-        // Restore saved comments, styles and java-scripts
332
-        for ($i = 0; $i < count($noFormat); $i++) {
333
-            $html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html);
334
-        }
335
-
336
-        // include debug comment at the end
337
-        if ($tabs != 0 && $this->debugComment === true) {
338
-            $html .= "<!-- $tabs open elements found -->";
339
-        }
340
-
341
-        return $html;
342
-    }
343
-
344
-    /**
345
-     * Remove ALL line breaks and multiple white space
346
-     *
347
-     * @param string $html
348
-     *
349
-     * @return string
350
-     */
351
-    protected function killLineBreaks($html)
352
-    {
353
-        $html = str_replace($this->newline, '', $html);
354
-        $html = preg_replace('/\s\s+/u', ' ', $html);
355
-        return $html;
356
-        #? return preg_replace('/\n|\s+(\s)/u', '$1', $html);
357
-    }
358
-
359
-    /**
360
-     * Remove multiple white space, keeps line breaks
361
-     *
362
-     * @param string $html
363
-     *
364
-     * @return string
365
-     */
366
-    protected function killWhiteSpace($html)
367
-    {
368
-        $temp = explode($this->newline, $html);
369
-        for ($i = 0; $i < count($temp); $i++) {
370
-            if (!trim($temp[$i])) {
371
-                unset($temp[$i]);
372
-                continue;
373
-            }
374
-
375
-            $temp[$i] = trim($temp[$i]);
376
-            $temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]);
377
-        }
378
-        $html = implode($this->newline, $temp);
379
-        return $html;
380
-    }
381
-
382
-    /**
383
-     * Remove white space at the end of lines, keeps other white space and line breaks
384
-     *
385
-     * @param string $html
386
-     *
387
-     * @return string
388
-     */
389
-    protected function rTrimLines(&$html)
390
-    {
391
-        $html = preg_replace('/\s+$/m', '', $html);
392
-    }
393
-
394
-    /**
395
-     * Convert newlines according to the current OS
396
-     *
397
-     * @param string $html
398
-     *
399
-     * @return string
400
-     */
401
-    protected function convNlOs(&$html)
402
-    {
403
-        $html = preg_replace("(\r\n|\r)", $this->newline, $html);
404
-    }
405
-
406
-    /**
407
-     * Remove empty lines
408
-     *
409
-     * @param string $html
410
-     *
411
-     * @return void
412
-     */
413
-    protected function removeEmptyLines(&$html)
414
-    {
415
-        $temp = explode($this->newline, $html);
416
-        $result = [];
417
-        for ($i = 0; $i < count($temp); ++$i) {
418
-            if ('' == trim($temp[$i])) {
419
-                continue;
420
-            }
421
-            $result[] = $temp[$i];
422
-        }
423
-        $html = implode($this->newline, $result);
424
-    }
425
-
426
-    /**
427
-     * Include configured header comment in HTML content block
428
-     *
429
-     * @param $html
430
-     */
431
-    public function includeHeaderComment(&$html)
432
-    {
433
-        $html = preg_replace('/^(-->)$/m', "\n\t" . $this->headerComment . "\n$1", $html);
434
-    }
19
+	/**
20
+	 * Enable Debug comment in footer
21
+	 *
22
+	 * @var boolean
23
+	 */
24
+	protected $debugComment = false;
25
+
26
+	/**
27
+	 * Format Type
28
+	 *
29
+	 * @var integer
30
+	 */
31
+	protected $formatType = 0;
32
+
33
+	/**
34
+	 * Tab character
35
+	 *
36
+	 * @var string
37
+	 */
38
+	protected $tab = "\t";
39
+
40
+	/**
41
+	 * Newline character
42
+	 *
43
+	 * @var string
44
+	 */
45
+	protected $newline = "\n";
46
+
47
+	/**
48
+	 * Configured extra header comment
49
+	 *
50
+	 * @var string
51
+	 */
52
+	protected $headerComment = '';
53
+
54
+	/**
55
+	 * Empty space char
56
+	 * @var string
57
+	 */
58
+	protected $emptySpaceChar = ' ';
59
+
60
+	/**
61
+	 * Set variables based on given config
62
+	 *
63
+	 * @param array $config
64
+	 *
65
+	 * @return void
66
+	 */
67
+	public function setVariables(array $config)
68
+	{
69
+		if (!empty($config)) {
70
+			if ($config['formatHtml'] && is_numeric($config['formatHtml'])) {
71
+				$this->formatType = (int)$config['formatHtml'];
72
+			}
73
+
74
+			if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) {
75
+				$this->tab = str_pad('', $config['formatHtml.']['tabSize'], ' ');
76
+			}
77
+
78
+			if (isset($config['formatHtml.']['debugComment'])) {
79
+				$this->debugComment = (bool)$config['formatHtml.']['debugComment'];
80
+			}
81
+
82
+			if (isset($config['headerComment'])) {
83
+				$this->headerComment = $config['headerComment'];
84
+			}
85
+
86
+			if (isset($config['dropEmptySpaceChar']) && (bool)$config['dropEmptySpaceChar']) {
87
+				$this->emptySpaceChar = '';
88
+			}
89
+		}
90
+	}
91
+
92
+	/**
93
+	 * Clean given HTML with formatter
94
+	 *
95
+	 * @param string $html
96
+	 * @param array $config
97
+	 *
98
+	 * @return string
99
+	 */
100
+	public function clean($html, $config = [])
101
+	{
102
+		if (!empty($config)) {
103
+			if ((bool)$config['enabled'] === false) {
104
+				return $html;
105
+			}
106
+
107
+			$this->setVariables($config);
108
+		}
109
+		// convert line-breaks to UNIX
110
+		$this->convNlOs($html);
111
+
112
+		$manipulations = [];
113
+
114
+		if (isset($config['removeGenerator']) && (bool)$config['removeGenerator']) {
115
+			$manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class);
116
+		}
117
+
118
+		if (isset($config['removeComments']) && (bool)$config['removeComments']) {
119
+			$manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class);
120
+		}
121
+
122
+		if (!empty($this->headerComment)) {
123
+			$this->includeHeaderComment($html);
124
+		}
125
+
126
+		foreach ($manipulations as $key => $manipulation) {
127
+			/** @var ManipulationInterface $manipulation */
128
+			$configuration = isset($config[$key . '.']) && is_array($config[$key . '.']) ? $config[$key . '.'] : [];
129
+			$html = $manipulation->manipulate($html, $configuration);
130
+		}
131
+
132
+		// cleanup HTML5 self-closing elements
133
+		if (!isset($GLOBALS['TSFE']->config['config']['doctype']) ||
134
+			'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'], 0, 1)) {
135
+			$html = preg_replace(
136
+				'/<((?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)\s[^>]+?)\s?\/>/',
137
+				'<$1>',
138
+				$html
139
+			);
140
+		}
141
+
142
+		if ($this->formatType > 0) {
143
+			$html = $this->formatHtml($html);
144
+		}
145
+		// remove white space after line ending
146
+		$this->rTrimLines($html);
147
+
148
+		// recover line-breaks
149
+		if (Environment::isWindows()) {
150
+			$html = str_replace($this->newline, "\r\n", $html);
151
+		}
152
+
153
+		return $html;
154
+	}
155
+
156
+	/**
157
+	 * Formats the (X)HTML code:
158
+	 *  - taps according to the hirarchy of the tags
159
+	 *  - removes empty spaces between tags
160
+	 *  - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..)
161
+	 *  choose from five options:
162
+	 *    0 => off
163
+	 *    1 => no line break at all  (code in one line)
164
+	 *    2 => minimalistic line breaks (structure defining box-elements)
165
+	 *    3 => aesthetic line breaks (important box-elements)
166
+	 *    4 => logic line breaks (all box-elements)
167
+	 *    5 => max line breaks (all elements)
168
+	 *
169
+	 * @param string $html
170
+	 *
171
+	 * @return string
172
+	 */
173
+	protected function formatHtml($html)
174
+	{
175
+		// Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers
176
+		preg_match_all(
177
+			'/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im',
178
+			$html,
179
+			$matches
180
+		);
181
+		$noFormat = $matches[0]; // do not format these block elements
182
+		for ($i = 0; $i < count($noFormat); $i++) {
183
+			$html = str_replace($noFormat[$i], "\n<!-- ELEMENT $i -->", $html);
184
+		}
185
+
186
+		// define box elements for formatting
187
+		$trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section';
188
+		$functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
189
+		$usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
190
+		$imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
191
+		$allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')';
192
+		$esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
193
+		$structureBoxLikeElements = '(?>html|head|body|div|!--)';
194
+
195
+		// split html into it's elements
196
+		$htmlArrayTemp = preg_split(
197
+			'/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
198
+			$html,
199
+			-1,
200
+			PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY
201
+		);
202
+
203
+		if ($htmlArrayTemp === false) {
204
+			// Restore saved comments, styles and java-scripts
205
+			for ($i = 0; $i < count($noFormat); $i++) {
206
+				$html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html);
207
+			}
208
+			return $html;
209
+		}
210
+		// remove empty lines
211
+		$htmlArray = [''];
212
+		$index = 1;
213
+		for ($x = 0; $x < count($htmlArrayTemp); $x++) {
214
+			$text = trim($htmlArrayTemp[$x]);
215
+			$htmlArray[$index] = $text !== '' ? $htmlArrayTemp[$x] : $this->emptySpaceChar;
216
+			$index++;
217
+		}
218
+
219
+		// rebuild html
220
+		$html = '';
221
+		$tabs = 0;
222
+		for ($x = 0; $x < count($htmlArray); $x++) {
223
+			// check if the element should stand in a new line
224
+			$newline = false;
225
+			if (substr($htmlArray[$x - 1], 0, 5) == '<?xml') {
226
+				$newline = true;
227
+			} elseif ($this->formatType == 2 && ( // minimalistic line break
228
+					# this element has a line break before itself
229
+					preg_match(
230
+						'/<' . $structureBoxLikeElements . '(.*)>/Usi',
231
+						$htmlArray[$x]
232
+					) || preg_match(
233
+						'/<' . $structureBoxLikeElements . '(.*) \/>/Usi',
234
+						$htmlArray[$x]
235
+					) || # one element before is a element that has a line break after
236
+					preg_match(
237
+						'/<\/' . $structureBoxLikeElements . '(.*)>/Usi',
238
+						$htmlArray[$x - 1]
239
+					) || substr(
240
+						$htmlArray[$x - 1],
241
+						0,
242
+						4
243
+					) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
244
+			) {
245
+				$newline = true;
246
+			} elseif ($this->formatType == 3 && ( // aestetic line break
247
+					# this element has a line break before itself
248
+					preg_match(
249
+						'/<' . $esteticBoxLikeElements . '(.*)>/Usi',
250
+						$htmlArray[$x]
251
+					) || preg_match(
252
+						'/<' . $esteticBoxLikeElements . '(.*) \/>/Usi',
253
+						$htmlArray[$x]
254
+					) || # one element before is a element that has a line break after
255
+					preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr(
256
+						$htmlArray[$x - 1],
257
+						0,
258
+						4
259
+					) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
260
+			) {
261
+				$newline = true;
262
+			} elseif ($this->formatType >= 4 && ( // logical line break
263
+					# this element has a line break before itself
264
+					preg_match(
265
+						'/<' . $allBoxLikeElements . '(.*)>/Usi',
266
+						$htmlArray[$x]
267
+					) || preg_match(
268
+						'/<' . $allBoxLikeElements . '(.*) \/>/Usi',
269
+						$htmlArray[$x]
270
+					) || # one element before is a element that has a line break after
271
+					preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr(
272
+						$htmlArray[$x - 1],
273
+						0,
274
+						4
275
+					) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
276
+			) {
277
+				$newline = true;
278
+			}
279
+
280
+			// count down a tab
281
+			if (substr($htmlArray[$x], 0, 2) == '</') {
282
+				$tabs--;
283
+			}
284
+
285
+			// add tabs and line breaks in front of the current tag
286
+			if ($newline) {
287
+				$html .= $this->newline;
288
+				for ($y = 0; $y < $tabs; $y++) {
289
+					$html .= $this->tab;
290
+				}
291
+			}
292
+
293
+			// remove white spaces and line breaks and add current tag to the html-string
294
+			if (substr($htmlArray[$x], 0, 9) == '<![CDATA[' // remove multiple white space in CDATA / XML
295
+				|| substr($htmlArray[$x], 0, 5) == '<?xml'
296
+			) {
297
+				$html .= $this->killWhiteSpace($htmlArray[$x]);
298
+			} else { // remove all line breaks
299
+				$html .= $this->killLineBreaks($htmlArray[$x]);
300
+			}
301
+
302
+			// count up a tab
303
+			if (substr($htmlArray[$x], 0, 1) == '<' && substr($htmlArray[$x], 1, 1) != '/') {
304
+				if (substr($htmlArray[$x], 1, 1) !== ' '
305
+					&& substr($htmlArray[$x], 1, 3) !== 'img'
306
+					&& substr($htmlArray[$x], 1, 6) !== 'source'
307
+					&& substr($htmlArray[$x], 1, 2) !== 'br'
308
+					&& substr($htmlArray[$x], 1, 2) !== 'hr'
309
+					&& substr($htmlArray[$x], 1, 5) !== 'input'
310
+					&& substr($htmlArray[$x], 1, 4) !== 'link'
311
+					&& substr($htmlArray[$x], 1, 4) !== 'meta'
312
+					&& substr($htmlArray[$x], 1, 4) !== 'col '
313
+					&& substr($htmlArray[$x], 1, 5) !== 'frame'
314
+					&& substr($htmlArray[$x], 1, 7) !== 'isindex'
315
+					&& substr($htmlArray[$x], 1, 5) !== 'param'
316
+					&& substr($htmlArray[$x], 1, 4) !== 'area'
317
+					&& substr($htmlArray[$x], 1, 4) !== 'base'
318
+					&& substr($htmlArray[$x], 0, 2) !== '<!'
319
+					&& substr($htmlArray[$x], 0, 5) !== '<?xml'
320
+				) {
321
+					$tabs++;
322
+				}
323
+			}
324
+		}
325
+
326
+		// Remove empty lines
327
+		if ($this->formatType > 1) {
328
+			$this->removeEmptyLines($html);
329
+		}
330
+
331
+		// Restore saved comments, styles and java-scripts
332
+		for ($i = 0; $i < count($noFormat); $i++) {
333
+			$html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html);
334
+		}
335
+
336
+		// include debug comment at the end
337
+		if ($tabs != 0 && $this->debugComment === true) {
338
+			$html .= "<!-- $tabs open elements found -->";
339
+		}
340
+
341
+		return $html;
342
+	}
343
+
344
+	/**
345
+	 * Remove ALL line breaks and multiple white space
346
+	 *
347
+	 * @param string $html
348
+	 *
349
+	 * @return string
350
+	 */
351
+	protected function killLineBreaks($html)
352
+	{
353
+		$html = str_replace($this->newline, '', $html);
354
+		$html = preg_replace('/\s\s+/u', ' ', $html);
355
+		return $html;
356
+		#? return preg_replace('/\n|\s+(\s)/u', '$1', $html);
357
+	}
358
+
359
+	/**
360
+	 * Remove multiple white space, keeps line breaks
361
+	 *
362
+	 * @param string $html
363
+	 *
364
+	 * @return string
365
+	 */
366
+	protected function killWhiteSpace($html)
367
+	{
368
+		$temp = explode($this->newline, $html);
369
+		for ($i = 0; $i < count($temp); $i++) {
370
+			if (!trim($temp[$i])) {
371
+				unset($temp[$i]);
372
+				continue;
373
+			}
374
+
375
+			$temp[$i] = trim($temp[$i]);
376
+			$temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]);
377
+		}
378
+		$html = implode($this->newline, $temp);
379
+		return $html;
380
+	}
381
+
382
+	/**
383
+	 * Remove white space at the end of lines, keeps other white space and line breaks
384
+	 *
385
+	 * @param string $html
386
+	 *
387
+	 * @return string
388
+	 */
389
+	protected function rTrimLines(&$html)
390
+	{
391
+		$html = preg_replace('/\s+$/m', '', $html);
392
+	}
393
+
394
+	/**
395
+	 * Convert newlines according to the current OS
396
+	 *
397
+	 * @param string $html
398
+	 *
399
+	 * @return string
400
+	 */
401
+	protected function convNlOs(&$html)
402
+	{
403
+		$html = preg_replace("(\r\n|\r)", $this->newline, $html);
404
+	}
405
+
406
+	/**
407
+	 * Remove empty lines
408
+	 *
409
+	 * @param string $html
410
+	 *
411
+	 * @return void
412
+	 */
413
+	protected function removeEmptyLines(&$html)
414
+	{
415
+		$temp = explode($this->newline, $html);
416
+		$result = [];
417
+		for ($i = 0; $i < count($temp); ++$i) {
418
+			if ('' == trim($temp[$i])) {
419
+				continue;
420
+			}
421
+			$result[] = $temp[$i];
422
+		}
423
+		$html = implode($this->newline, $result);
424
+	}
425
+
426
+	/**
427
+	 * Include configured header comment in HTML content block
428
+	 *
429
+	 * @param $html
430
+	 */
431
+	public function includeHeaderComment(&$html)
432
+	{
433
+		$html = preg_replace('/^(-->)$/m', "\n\t" . $this->headerComment . "\n$1", $html);
434
+	}
435 435
 }
Please login to merge, or discard this patch.
Spacing   +22 added lines, -22 removed lines patch added patch discarded remove patch
@@ -68,7 +68,7 @@  discard block
 block discarded – undo
68 68
     {
69 69
         if (!empty($config)) {
70 70
             if ($config['formatHtml'] && is_numeric($config['formatHtml'])) {
71
-                $this->formatType = (int)$config['formatHtml'];
71
+                $this->formatType = (int) $config['formatHtml'];
72 72
             }
73 73
 
74 74
             if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) {
@@ -76,14 +76,14 @@  discard block
 block discarded – undo
76 76
             }
77 77
 
78 78
             if (isset($config['formatHtml.']['debugComment'])) {
79
-                $this->debugComment = (bool)$config['formatHtml.']['debugComment'];
79
+                $this->debugComment = (bool) $config['formatHtml.']['debugComment'];
80 80
             }
81 81
 
82 82
             if (isset($config['headerComment'])) {
83 83
                 $this->headerComment = $config['headerComment'];
84 84
             }
85 85
 
86
-            if (isset($config['dropEmptySpaceChar']) && (bool)$config['dropEmptySpaceChar']) {
86
+            if (isset($config['dropEmptySpaceChar']) && (bool) $config['dropEmptySpaceChar']) {
87 87
                 $this->emptySpaceChar = '';
88 88
             }
89 89
         }
@@ -100,7 +100,7 @@  discard block
 block discarded – undo
100 100
     public function clean($html, $config = [])
101 101
     {
102 102
         if (!empty($config)) {
103
-            if ((bool)$config['enabled'] === false) {
103
+            if ((bool) $config['enabled'] === false) {
104 104
                 return $html;
105 105
             }
106 106
 
@@ -111,11 +111,11 @@  discard block
 block discarded – undo
111 111
 
112 112
         $manipulations = [];
113 113
 
114
-        if (isset($config['removeGenerator']) && (bool)$config['removeGenerator']) {
114
+        if (isset($config['removeGenerator']) && (bool) $config['removeGenerator']) {
115 115
             $manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class);
116 116
         }
117 117
 
118
-        if (isset($config['removeComments']) && (bool)$config['removeComments']) {
118
+        if (isset($config['removeComments']) && (bool) $config['removeComments']) {
119 119
             $manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class);
120 120
         }
121 121
 
@@ -125,7 +125,7 @@  discard block
 block discarded – undo
125 125
 
126 126
         foreach ($manipulations as $key => $manipulation) {
127 127
             /** @var ManipulationInterface $manipulation */
128
-            $configuration = isset($config[$key . '.']) && is_array($config[$key . '.']) ? $config[$key . '.'] : [];
128
+            $configuration = isset($config[$key.'.']) && is_array($config[$key.'.']) ? $config[$key.'.'] : [];
129 129
             $html = $manipulation->manipulate($html, $configuration);
130 130
         }
131 131
 
@@ -188,7 +188,7 @@  discard block
 block discarded – undo
188 188
         $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
189 189
         $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
190 190
         $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
191
-        $allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')';
191
+        $allBoxLikeElements = '(?>'.$trueBoxElements.'|'.$functionalBoxElements.'|'.$usableBoxElements.'|'.$imagineBoxElements.')';
192 192
         $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
193 193
         $structureBoxLikeElements = '(?>html|head|body|div|!--)';
194 194
 
@@ -197,7 +197,7 @@  discard block
 block discarded – undo
197 197
             '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
198 198
             $html,
199 199
             -1,
200
-            PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY
200
+            PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY
201 201
         );
202 202
 
203 203
         if ($htmlArrayTemp === false) {
@@ -227,52 +227,52 @@  discard block
 block discarded – undo
227 227
             } elseif ($this->formatType == 2 && ( // minimalistic line break
228 228
                     # this element has a line break before itself
229 229
                     preg_match(
230
-                        '/<' . $structureBoxLikeElements . '(.*)>/Usi',
230
+                        '/<'.$structureBoxLikeElements.'(.*)>/Usi',
231 231
                         $htmlArray[$x]
232 232
                     ) || preg_match(
233
-                        '/<' . $structureBoxLikeElements . '(.*) \/>/Usi',
233
+                        '/<'.$structureBoxLikeElements.'(.*) \/>/Usi',
234 234
                         $htmlArray[$x]
235 235
                     ) || # one element before is a element that has a line break after
236 236
                     preg_match(
237
-                        '/<\/' . $structureBoxLikeElements . '(.*)>/Usi',
237
+                        '/<\/'.$structureBoxLikeElements.'(.*)>/Usi',
238 238
                         $htmlArray[$x - 1]
239 239
                     ) || substr(
240 240
                         $htmlArray[$x - 1],
241 241
                         0,
242 242
                         4
243
-                    ) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
243
+                    ) == '<!--' || preg_match('/<'.$structureBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1]))
244 244
             ) {
245 245
                 $newline = true;
246 246
             } elseif ($this->formatType == 3 && ( // aestetic line break
247 247
                     # this element has a line break before itself
248 248
                     preg_match(
249
-                        '/<' . $esteticBoxLikeElements . '(.*)>/Usi',
249
+                        '/<'.$esteticBoxLikeElements.'(.*)>/Usi',
250 250
                         $htmlArray[$x]
251 251
                     ) || preg_match(
252
-                        '/<' . $esteticBoxLikeElements . '(.*) \/>/Usi',
252
+                        '/<'.$esteticBoxLikeElements.'(.*) \/>/Usi',
253 253
                         $htmlArray[$x]
254 254
                     ) || # one element before is a element that has a line break after
255
-                    preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr(
255
+                    preg_match('/<\/'.$esteticBoxLikeElements.'(.*)>/Usi', $htmlArray[$x - 1]) || substr(
256 256
                         $htmlArray[$x - 1],
257 257
                         0,
258 258
                         4
259
-                    ) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
259
+                    ) == '<!--' || preg_match('/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1]))
260 260
             ) {
261 261
                 $newline = true;
262 262
             } elseif ($this->formatType >= 4 && ( // logical line break
263 263
                     # this element has a line break before itself
264 264
                     preg_match(
265
-                        '/<' . $allBoxLikeElements . '(.*)>/Usi',
265
+                        '/<'.$allBoxLikeElements.'(.*)>/Usi',
266 266
                         $htmlArray[$x]
267 267
                     ) || preg_match(
268
-                        '/<' . $allBoxLikeElements . '(.*) \/>/Usi',
268
+                        '/<'.$allBoxLikeElements.'(.*) \/>/Usi',
269 269
                         $htmlArray[$x]
270 270
                     ) || # one element before is a element that has a line break after
271
-                    preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr(
271
+                    preg_match('/<\/'.$allBoxLikeElements.'(.*)>/Usi', $htmlArray[$x - 1]) || substr(
272 272
                         $htmlArray[$x - 1],
273 273
                         0,
274 274
                         4
275
-                    ) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
275
+                    ) == '<!--' || preg_match('/<'.$allBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1]))
276 276
             ) {
277 277
                 $newline = true;
278 278
             }
@@ -430,6 +430,6 @@  discard block
 block discarded – undo
430 430
      */
431 431
     public function includeHeaderComment(&$html)
432 432
     {
433
-        $html = preg_replace('/^(-->)$/m', "\n\t" . $this->headerComment . "\n$1", $html);
433
+        $html = preg_replace('/^(-->)$/m', "\n\t".$this->headerComment."\n$1", $html);
434 434
     }
435 435
 }
Please login to merge, or discard this patch.