Completed
Push — master ( 685f09...61fbf9 )
by Tim
18s queued 12s
created
Classes/Service/CleanHtmlService.php 2 patches
Indentation   +416 added lines, -416 removed lines patch added patch discarded remove patch
@@ -16,420 +16,420 @@
 block discarded – undo
16 16
 class CleanHtmlService implements SingletonInterface
17 17
 {
18 18
 
19
-    /**
20
-     * Enable Debug comment in footer
21
-     *
22
-     * @var boolean
23
-     */
24
-    protected $debugComment = false;
25
-
26
-    /**
27
-     * Format Type
28
-     *
29
-     * @var integer
30
-     */
31
-    protected $formatType = 0;
32
-
33
-    /**
34
-     * Tab character
35
-     *
36
-     * @var string
37
-     */
38
-    protected $tab = "\t";
39
-
40
-    /**
41
-     * Newline character
42
-     *
43
-     * @var string
44
-     */
45
-    protected $newline = "\n";
46
-
47
-    /**
48
-     * Configured extra header comment
49
-     *
50
-     * @var string
51
-     */
52
-    protected $headerComment = '';
53
-
54
-    /**
55
-     * Empty space char
56
-     * @var string
57
-     */
58
-    protected $emptySpaceChar = ' ';
59
-
60
-    /**
61
-     * Set variables based on given config
62
-     *
63
-     * @param array $config
64
-     *
65
-     * @return void
66
-     */
67
-    public function setVariables(array $config)
68
-    {
69
-        if (!empty($config)) {
70
-            if ($config['formatHtml'] && is_numeric($config['formatHtml'])) {
71
-                $this->formatType = (int)$config['formatHtml'];
72
-            }
73
-
74
-            if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) {
75
-                $this->tab = str_pad('', $config['formatHtml.']['tabSize'], ' ');
76
-            }
77
-
78
-            if (isset($config['formatHtml.']['debugComment'])) {
79
-                $this->debugComment = (bool)$config['formatHtml.']['debugComment'];
80
-            }
81
-
82
-            if (isset($config['headerComment'])) {
83
-                $this->headerComment = $config['headerComment'];
84
-            }
85
-
86
-            if (isset($config['dropEmptySpaceChar']) && (bool)$config['dropEmptySpaceChar']) {
87
-                $this->emptySpaceChar = '';
88
-            }
89
-        }
90
-    }
91
-
92
-    /**
93
-     * Clean given HTML with formatter
94
-     *
95
-     * @param string $html
96
-     * @param array $config
97
-     *
98
-     * @return string
99
-     */
100
-    public function clean($html, $config = [])
101
-    {
102
-        if (!empty($config)) {
103
-            if ((bool)$config['enabled'] === false) {
104
-                return $html;
105
-            }
106
-
107
-            $this->setVariables($config);
108
-        }
109
-        // convert line-breaks to UNIX
110
-        $this->convNlOs($html);
111
-
112
-        $manipulations = [];
113
-
114
-        if (isset($config['removeGenerator']) && (bool)$config['removeGenerator']) {
115
-            $manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class);
116
-        }
117
-
118
-        if (isset($config['removeComments']) && (bool)$config['removeComments']) {
119
-            $manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class);
120
-        }
121
-
122
-        if (!empty($this->headerComment)) {
123
-            $this->includeHeaderComment($html);
124
-        }
125
-
126
-        foreach ($manipulations as $key => $manipulation) {
127
-            /** @var ManipulationInterface $manipulation */
128
-            $configuration = isset($config[$key . '.']) && is_array($config[$key . '.']) ? $config[$key . '.'] : [];
129
-            $html = $manipulation->manipulate($html, $configuration);
130
-        }
131
-
132
-        // cleanup HTML5 self-closing elements
133
-        if(!isset($GLOBALS['TSFE']->config['config']['doctype']) || 'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'],0,1)) {
134
-            $html = preg_replace('/<((?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)\s[^>]+?)\s?\/>/', '<$1>', $html);
135
-        }
136
-
137
-        if ($this->formatType > 0) {
138
-            $html = $this->formatHtml($html);
139
-        }
140
-        // remove white space after line ending
141
-        $this->rTrimLines($html);
142
-
143
-        // recover line-breaks
144
-        if (Environment::isWindows()) {
145
-          $html = str_replace($this->newline, "\r\n", $html);
146
-        }
147
-
148
-        return $html;
149
-    }
150
-
151
-    /**
152
-     * Formats the (X)HTML code:
153
-     *  - taps according to the hirarchy of the tags
154
-     *  - removes empty spaces between tags
155
-     *  - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..)
156
-     *  choose from five options:
157
-     *    0 => off
158
-     *    1 => no line break at all  (code in one line)
159
-     *    2 => minimalistic line breaks (structure defining box-elements)
160
-     *    3 => aesthetic line breaks (important box-elements)
161
-     *    4 => logic line breaks (all box-elements)
162
-     *    5 => max line breaks (all elements)
163
-     *
164
-     * @param string $html
165
-     *
166
-     * @return string
167
-     */
168
-    protected function formatHtml($html)
169
-    {
170
-        // Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers
171
-        preg_match_all(
172
-            '/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im',
173
-            $html,
174
-            $matches
175
-        );
176
-        $noFormat = $matches[0]; // do not format these block elements
177
-        for ($i = 0; $i < count($noFormat); $i++) {
178
-            $html = str_replace($noFormat[$i], "\n<!-- ELEMENT $i -->", $html);
179
-        }
180
-
181
-        // define box elements for formatting
182
-        $trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section';
183
-        $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
184
-        $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
185
-        $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
186
-        $allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')';
187
-        $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
188
-        $structureBoxLikeElements = '(?>html|head|body|div|!--)';
189
-
190
-        // split html into it's elements
191
-        $htmlArrayTemp = preg_split(
192
-            '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
193
-            $html,
194
-            -1,
195
-            PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY
196
-        );
197
-
198
-        if ($htmlArrayTemp === false) {
199
-            // Restore saved comments, styles and java-scripts
200
-            for ($i = 0; $i < count($noFormat); $i++) {
201
-                $html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html);
202
-            }
203
-            return $html;
204
-        }
205
-        // remove empty lines
206
-        $htmlArray = [''];
207
-        $z = 1;
208
-        for ($x = 0; $x < count($htmlArrayTemp); $x++) {
209
-            $t = trim($htmlArrayTemp[$x]);
210
-            if ($t !== '') {
211
-                $htmlArray[$z] = $htmlArrayTemp[$x];
212
-                $z++;
213
-            } else {
214
-                $htmlArray[$z] = $this->emptySpaceChar;
215
-                $z++;
216
-            }
217
-        }
218
-
219
-        // rebuild html
220
-        $html = '';
221
-        $tabs = 0;
222
-        for ($x = 0; $x < count($htmlArray); $x++) {
223
-            // check if the element should stand in a new line
224
-            $newline = false;
225
-            if (substr($htmlArray[$x - 1], 0, 5) == '<?xml') {
226
-                $newline = true;
227
-            } elseif ($this->formatType == 2 && ( // minimalistic line break
228
-                    # this element has a line break before itself
229
-                    preg_match(
230
-                        '/<' . $structureBoxLikeElements . '(.*)>/Usi',
231
-                        $htmlArray[$x]
232
-                    ) || preg_match(
233
-                        '/<' . $structureBoxLikeElements . '(.*) \/>/Usi',
234
-                        $htmlArray[$x]
235
-                    ) || # one element before is a element that has a line break after
236
-                    preg_match(
237
-                        '/<\/' . $structureBoxLikeElements . '(.*)>/Usi',
238
-                        $htmlArray[$x - 1]
239
-                    ) || substr(
240
-                        $htmlArray[$x - 1],
241
-                        0,
242
-                        4
243
-                    ) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
244
-            ) {
245
-                $newline = true;
246
-            } elseif ($this->formatType == 3 && ( // aestetic line break
247
-                    # this element has a line break before itself
248
-                    preg_match(
249
-                        '/<' . $esteticBoxLikeElements . '(.*)>/Usi',
250
-                        $htmlArray[$x]
251
-                    ) || preg_match(
252
-                        '/<' . $esteticBoxLikeElements . '(.*) \/>/Usi',
253
-                        $htmlArray[$x]
254
-                    ) || # one element before is a element that has a line break after
255
-                    preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr(
256
-                        $htmlArray[$x - 1],
257
-                        0,
258
-                        4
259
-                    ) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
260
-            ) {
261
-                $newline = true;
262
-            } elseif ($this->formatType >= 4 && ( // logical line break
263
-                    # this element has a line break before itself
264
-                    preg_match(
265
-                        '/<' . $allBoxLikeElements . '(.*)>/Usi',
266
-                        $htmlArray[$x]
267
-                    ) || preg_match(
268
-                        '/<' . $allBoxLikeElements . '(.*) \/>/Usi',
269
-                        $htmlArray[$x]
270
-                    ) || # one element before is a element that has a line break after
271
-                    preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr(
272
-                        $htmlArray[$x - 1],
273
-                        0,
274
-                        4
275
-                    ) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
276
-            ) {
277
-                $newline = true;
278
-            }
279
-
280
-            // count down a tab
281
-            if (substr($htmlArray[$x], 0, 2) == '</') {
282
-                $tabs--;
283
-            }
284
-
285
-            // add tabs and line breaks in front of the current tag
286
-            if ($newline) {
287
-                $html .= $this->newline;
288
-                for ($y = 0; $y < $tabs; $y++) {
289
-                    $html .= $this->tab;
290
-                }
291
-            }
292
-
293
-            // remove white spaces and line breaks and add current tag to the html-string
294
-            if (substr($htmlArray[$x], 0, 9) == '<![CDATA[' // remove multiple white space in CDATA / XML
295
-                || substr($htmlArray[$x], 0, 5) == '<?xml'
296
-            ) {
297
-                $html .= $this->killWhiteSpace($htmlArray[$x]);
298
-            } else { // remove all line breaks
299
-                $html .= $this->killLineBreaks($htmlArray[$x]);
300
-            }
301
-
302
-            // count up a tab
303
-            if (substr($htmlArray[$x], 0, 1) == '<' && substr($htmlArray[$x], 1, 1) != '/') {
304
-                if (
305
-                    substr($htmlArray[$x], 1, 1) !== ' '
306
-                    && substr($htmlArray[$x], 1, 3) !== 'img'
307
-                    && substr($htmlArray[$x], 1, 6) !== 'source'
308
-                    && substr($htmlArray[$x], 1, 2) !== 'br'
309
-                    && substr($htmlArray[$x], 1, 2) !== 'hr'
310
-                    && substr($htmlArray[$x], 1, 5) !== 'input'
311
-                    && substr($htmlArray[$x], 1, 4) !== 'link'
312
-                    && substr($htmlArray[$x], 1, 4) !== 'meta'
313
-                    && substr($htmlArray[$x], 1, 4) !== 'col '
314
-                    && substr($htmlArray[$x], 1, 5) !== 'frame'
315
-                    && substr($htmlArray[$x], 1, 7) !== 'isindex'
316
-                    && substr($htmlArray[$x], 1, 5) !== 'param'
317
-                    && substr($htmlArray[$x], 1, 4) !== 'area'
318
-                    && substr($htmlArray[$x], 1, 4) !== 'base'
319
-                    && substr($htmlArray[$x], 0, 2) !== '<!'
320
-                    && substr($htmlArray[$x], 0, 5) !== '<?xml'
321
-                ) {
322
-                    $tabs++;
323
-                }
324
-            }
325
-        }
326
-
327
-        // Remove empty lines
328
-        if ($this->formatType > 1) {
329
-            $this->removeEmptyLines($html);
330
-        }
331
-
332
-        // Restore saved comments, styles and java-scripts
333
-        for ($i = 0; $i < count($noFormat); $i++) {
334
-            $html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html);
335
-        }
336
-
337
-        // include debug comment at the end
338
-        if ($tabs != 0 && $this->debugComment === true) {
339
-            $html .= "<!-- $tabs open elements found -->";
340
-        }
341
-
342
-        return $html;
343
-    }
344
-
345
-    /**
346
-     * Remove ALL line breaks and multiple white space
347
-     *
348
-     * @param string $html
349
-     *
350
-     * @return string
351
-     */
352
-    protected function killLineBreaks($html)
353
-    {
354
-        $html = str_replace($this->newline, '', $html);
355
-        $html = preg_replace('/\s\s+/u', ' ', $html);
356
-        return $html;
357
-        #? return preg_replace('/\n|\s+(\s)/u', '$1', $html);
358
-    }
359
-
360
-    /**
361
-     * Remove multiple white space, keeps line breaks
362
-     *
363
-     * @param string $html
364
-     *
365
-     * @return string
366
-     */
367
-    protected function killWhiteSpace($html)
368
-    {
369
-        $temp = explode($this->newline, $html);
370
-        for ($i = 0; $i < count($temp); $i++) {
371
-            if (!trim($temp[$i])) {
372
-                unset($temp[$i]);
373
-            } else {
374
-                $temp[$i] = trim($temp[$i]);
375
-                $temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]);
376
-            }
377
-        }
378
-        $html = implode($this->newline, $temp);
379
-        return $html;
380
-    }
381
-
382
-    /**
383
-     * Remove white space at the end of lines, keeps other white space and line breaks
384
-     *
385
-     * @param string $html
386
-     *
387
-     * @return string
388
-     */
389
-    protected function rTrimLines(&$html)
390
-    {
391
-        $html = preg_replace('/\s+$/m', '', $html);
392
-    }
393
-
394
-    /**
395
-     * Convert newlines according to the current OS
396
-     *
397
-     * @param string $html
398
-     *
399
-     * @return string
400
-     */
401
-    protected function convNlOs(&$html)
402
-    {
403
-        $html = preg_replace("(\r\n|\r)", $this->newline, $html);
404
-    }
405
-
406
-    /**
407
-     * Remove empty lines
408
-     *
409
-     * @param string $html
410
-     *
411
-     * @return void
412
-     */
413
-    protected function removeEmptyLines(&$html)
414
-    {
415
-        $temp = explode($this->newline, $html);
416
-        $result = [];
417
-        for ($i = 0; $i < count($temp); ++$i) {
418
-            if ('' == trim($temp[$i])) {
419
-                continue;
420
-            }
421
-            $result[] = $temp[$i];
422
-        }
423
-        $html = implode($this->newline, $result);
424
-    }
425
-
426
-    /**
427
-     * Include configured header comment in HTML content block
428
-     *
429
-     * @param $html
430
-     */
431
-    public function includeHeaderComment(&$html)
432
-    {
433
-        $html = preg_replace('/^(-->)$/m', "\n\t" . $this->headerComment . "\n$1", $html);
434
-    }
19
+	/**
20
+	 * Enable Debug comment in footer
21
+	 *
22
+	 * @var boolean
23
+	 */
24
+	protected $debugComment = false;
25
+
26
+	/**
27
+	 * Format Type
28
+	 *
29
+	 * @var integer
30
+	 */
31
+	protected $formatType = 0;
32
+
33
+	/**
34
+	 * Tab character
35
+	 *
36
+	 * @var string
37
+	 */
38
+	protected $tab = "\t";
39
+
40
+	/**
41
+	 * Newline character
42
+	 *
43
+	 * @var string
44
+	 */
45
+	protected $newline = "\n";
46
+
47
+	/**
48
+	 * Configured extra header comment
49
+	 *
50
+	 * @var string
51
+	 */
52
+	protected $headerComment = '';
53
+
54
+	/**
55
+	 * Empty space char
56
+	 * @var string
57
+	 */
58
+	protected $emptySpaceChar = ' ';
59
+
60
+	/**
61
+	 * Set variables based on given config
62
+	 *
63
+	 * @param array $config
64
+	 *
65
+	 * @return void
66
+	 */
67
+	public function setVariables(array $config)
68
+	{
69
+		if (!empty($config)) {
70
+			if ($config['formatHtml'] && is_numeric($config['formatHtml'])) {
71
+				$this->formatType = (int)$config['formatHtml'];
72
+			}
73
+
74
+			if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) {
75
+				$this->tab = str_pad('', $config['formatHtml.']['tabSize'], ' ');
76
+			}
77
+
78
+			if (isset($config['formatHtml.']['debugComment'])) {
79
+				$this->debugComment = (bool)$config['formatHtml.']['debugComment'];
80
+			}
81
+
82
+			if (isset($config['headerComment'])) {
83
+				$this->headerComment = $config['headerComment'];
84
+			}
85
+
86
+			if (isset($config['dropEmptySpaceChar']) && (bool)$config['dropEmptySpaceChar']) {
87
+				$this->emptySpaceChar = '';
88
+			}
89
+		}
90
+	}
91
+
92
+	/**
93
+	 * Clean given HTML with formatter
94
+	 *
95
+	 * @param string $html
96
+	 * @param array $config
97
+	 *
98
+	 * @return string
99
+	 */
100
+	public function clean($html, $config = [])
101
+	{
102
+		if (!empty($config)) {
103
+			if ((bool)$config['enabled'] === false) {
104
+				return $html;
105
+			}
106
+
107
+			$this->setVariables($config);
108
+		}
109
+		// convert line-breaks to UNIX
110
+		$this->convNlOs($html);
111
+
112
+		$manipulations = [];
113
+
114
+		if (isset($config['removeGenerator']) && (bool)$config['removeGenerator']) {
115
+			$manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class);
116
+		}
117
+
118
+		if (isset($config['removeComments']) && (bool)$config['removeComments']) {
119
+			$manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class);
120
+		}
121
+
122
+		if (!empty($this->headerComment)) {
123
+			$this->includeHeaderComment($html);
124
+		}
125
+
126
+		foreach ($manipulations as $key => $manipulation) {
127
+			/** @var ManipulationInterface $manipulation */
128
+			$configuration = isset($config[$key . '.']) && is_array($config[$key . '.']) ? $config[$key . '.'] : [];
129
+			$html = $manipulation->manipulate($html, $configuration);
130
+		}
131
+
132
+		// cleanup HTML5 self-closing elements
133
+		if(!isset($GLOBALS['TSFE']->config['config']['doctype']) || 'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'],0,1)) {
134
+			$html = preg_replace('/<((?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)\s[^>]+?)\s?\/>/', '<$1>', $html);
135
+		}
136
+
137
+		if ($this->formatType > 0) {
138
+			$html = $this->formatHtml($html);
139
+		}
140
+		// remove white space after line ending
141
+		$this->rTrimLines($html);
142
+
143
+		// recover line-breaks
144
+		if (Environment::isWindows()) {
145
+		  $html = str_replace($this->newline, "\r\n", $html);
146
+		}
147
+
148
+		return $html;
149
+	}
150
+
151
+	/**
152
+	 * Formats the (X)HTML code:
153
+	 *  - taps according to the hirarchy of the tags
154
+	 *  - removes empty spaces between tags
155
+	 *  - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..)
156
+	 *  choose from five options:
157
+	 *    0 => off
158
+	 *    1 => no line break at all  (code in one line)
159
+	 *    2 => minimalistic line breaks (structure defining box-elements)
160
+	 *    3 => aesthetic line breaks (important box-elements)
161
+	 *    4 => logic line breaks (all box-elements)
162
+	 *    5 => max line breaks (all elements)
163
+	 *
164
+	 * @param string $html
165
+	 *
166
+	 * @return string
167
+	 */
168
+	protected function formatHtml($html)
169
+	{
170
+		// Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers
171
+		preg_match_all(
172
+			'/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im',
173
+			$html,
174
+			$matches
175
+		);
176
+		$noFormat = $matches[0]; // do not format these block elements
177
+		for ($i = 0; $i < count($noFormat); $i++) {
178
+			$html = str_replace($noFormat[$i], "\n<!-- ELEMENT $i -->", $html);
179
+		}
180
+
181
+		// define box elements for formatting
182
+		$trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section';
183
+		$functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
184
+		$usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
185
+		$imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
186
+		$allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')';
187
+		$esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
188
+		$structureBoxLikeElements = '(?>html|head|body|div|!--)';
189
+
190
+		// split html into it's elements
191
+		$htmlArrayTemp = preg_split(
192
+			'/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
193
+			$html,
194
+			-1,
195
+			PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY
196
+		);
197
+
198
+		if ($htmlArrayTemp === false) {
199
+			// Restore saved comments, styles and java-scripts
200
+			for ($i = 0; $i < count($noFormat); $i++) {
201
+				$html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html);
202
+			}
203
+			return $html;
204
+		}
205
+		// remove empty lines
206
+		$htmlArray = [''];
207
+		$z = 1;
208
+		for ($x = 0; $x < count($htmlArrayTemp); $x++) {
209
+			$t = trim($htmlArrayTemp[$x]);
210
+			if ($t !== '') {
211
+				$htmlArray[$z] = $htmlArrayTemp[$x];
212
+				$z++;
213
+			} else {
214
+				$htmlArray[$z] = $this->emptySpaceChar;
215
+				$z++;
216
+			}
217
+		}
218
+
219
+		// rebuild html
220
+		$html = '';
221
+		$tabs = 0;
222
+		for ($x = 0; $x < count($htmlArray); $x++) {
223
+			// check if the element should stand in a new line
224
+			$newline = false;
225
+			if (substr($htmlArray[$x - 1], 0, 5) == '<?xml') {
226
+				$newline = true;
227
+			} elseif ($this->formatType == 2 && ( // minimalistic line break
228
+					# this element has a line break before itself
229
+					preg_match(
230
+						'/<' . $structureBoxLikeElements . '(.*)>/Usi',
231
+						$htmlArray[$x]
232
+					) || preg_match(
233
+						'/<' . $structureBoxLikeElements . '(.*) \/>/Usi',
234
+						$htmlArray[$x]
235
+					) || # one element before is a element that has a line break after
236
+					preg_match(
237
+						'/<\/' . $structureBoxLikeElements . '(.*)>/Usi',
238
+						$htmlArray[$x - 1]
239
+					) || substr(
240
+						$htmlArray[$x - 1],
241
+						0,
242
+						4
243
+					) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
244
+			) {
245
+				$newline = true;
246
+			} elseif ($this->formatType == 3 && ( // aestetic line break
247
+					# this element has a line break before itself
248
+					preg_match(
249
+						'/<' . $esteticBoxLikeElements . '(.*)>/Usi',
250
+						$htmlArray[$x]
251
+					) || preg_match(
252
+						'/<' . $esteticBoxLikeElements . '(.*) \/>/Usi',
253
+						$htmlArray[$x]
254
+					) || # one element before is a element that has a line break after
255
+					preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr(
256
+						$htmlArray[$x - 1],
257
+						0,
258
+						4
259
+					) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
260
+			) {
261
+				$newline = true;
262
+			} elseif ($this->formatType >= 4 && ( // logical line break
263
+					# this element has a line break before itself
264
+					preg_match(
265
+						'/<' . $allBoxLikeElements . '(.*)>/Usi',
266
+						$htmlArray[$x]
267
+					) || preg_match(
268
+						'/<' . $allBoxLikeElements . '(.*) \/>/Usi',
269
+						$htmlArray[$x]
270
+					) || # one element before is a element that has a line break after
271
+					preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr(
272
+						$htmlArray[$x - 1],
273
+						0,
274
+						4
275
+					) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
276
+			) {
277
+				$newline = true;
278
+			}
279
+
280
+			// count down a tab
281
+			if (substr($htmlArray[$x], 0, 2) == '</') {
282
+				$tabs--;
283
+			}
284
+
285
+			// add tabs and line breaks in front of the current tag
286
+			if ($newline) {
287
+				$html .= $this->newline;
288
+				for ($y = 0; $y < $tabs; $y++) {
289
+					$html .= $this->tab;
290
+				}
291
+			}
292
+
293
+			// remove white spaces and line breaks and add current tag to the html-string
294
+			if (substr($htmlArray[$x], 0, 9) == '<![CDATA[' // remove multiple white space in CDATA / XML
295
+				|| substr($htmlArray[$x], 0, 5) == '<?xml'
296
+			) {
297
+				$html .= $this->killWhiteSpace($htmlArray[$x]);
298
+			} else { // remove all line breaks
299
+				$html .= $this->killLineBreaks($htmlArray[$x]);
300
+			}
301
+
302
+			// count up a tab
303
+			if (substr($htmlArray[$x], 0, 1) == '<' && substr($htmlArray[$x], 1, 1) != '/') {
304
+				if (
305
+					substr($htmlArray[$x], 1, 1) !== ' '
306
+					&& substr($htmlArray[$x], 1, 3) !== 'img'
307
+					&& substr($htmlArray[$x], 1, 6) !== 'source'
308
+					&& substr($htmlArray[$x], 1, 2) !== 'br'
309
+					&& substr($htmlArray[$x], 1, 2) !== 'hr'
310
+					&& substr($htmlArray[$x], 1, 5) !== 'input'
311
+					&& substr($htmlArray[$x], 1, 4) !== 'link'
312
+					&& substr($htmlArray[$x], 1, 4) !== 'meta'
313
+					&& substr($htmlArray[$x], 1, 4) !== 'col '
314
+					&& substr($htmlArray[$x], 1, 5) !== 'frame'
315
+					&& substr($htmlArray[$x], 1, 7) !== 'isindex'
316
+					&& substr($htmlArray[$x], 1, 5) !== 'param'
317
+					&& substr($htmlArray[$x], 1, 4) !== 'area'
318
+					&& substr($htmlArray[$x], 1, 4) !== 'base'
319
+					&& substr($htmlArray[$x], 0, 2) !== '<!'
320
+					&& substr($htmlArray[$x], 0, 5) !== '<?xml'
321
+				) {
322
+					$tabs++;
323
+				}
324
+			}
325
+		}
326
+
327
+		// Remove empty lines
328
+		if ($this->formatType > 1) {
329
+			$this->removeEmptyLines($html);
330
+		}
331
+
332
+		// Restore saved comments, styles and java-scripts
333
+		for ($i = 0; $i < count($noFormat); $i++) {
334
+			$html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html);
335
+		}
336
+
337
+		// include debug comment at the end
338
+		if ($tabs != 0 && $this->debugComment === true) {
339
+			$html .= "<!-- $tabs open elements found -->";
340
+		}
341
+
342
+		return $html;
343
+	}
344
+
345
+	/**
346
+	 * Remove ALL line breaks and multiple white space
347
+	 *
348
+	 * @param string $html
349
+	 *
350
+	 * @return string
351
+	 */
352
+	protected function killLineBreaks($html)
353
+	{
354
+		$html = str_replace($this->newline, '', $html);
355
+		$html = preg_replace('/\s\s+/u', ' ', $html);
356
+		return $html;
357
+		#? return preg_replace('/\n|\s+(\s)/u', '$1', $html);
358
+	}
359
+
360
+	/**
361
+	 * Remove multiple white space, keeps line breaks
362
+	 *
363
+	 * @param string $html
364
+	 *
365
+	 * @return string
366
+	 */
367
+	protected function killWhiteSpace($html)
368
+	{
369
+		$temp = explode($this->newline, $html);
370
+		for ($i = 0; $i < count($temp); $i++) {
371
+			if (!trim($temp[$i])) {
372
+				unset($temp[$i]);
373
+			} else {
374
+				$temp[$i] = trim($temp[$i]);
375
+				$temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]);
376
+			}
377
+		}
378
+		$html = implode($this->newline, $temp);
379
+		return $html;
380
+	}
381
+
382
+	/**
383
+	 * Remove white space at the end of lines, keeps other white space and line breaks
384
+	 *
385
+	 * @param string $html
386
+	 *
387
+	 * @return string
388
+	 */
389
+	protected function rTrimLines(&$html)
390
+	{
391
+		$html = preg_replace('/\s+$/m', '', $html);
392
+	}
393
+
394
+	/**
395
+	 * Convert newlines according to the current OS
396
+	 *
397
+	 * @param string $html
398
+	 *
399
+	 * @return string
400
+	 */
401
+	protected function convNlOs(&$html)
402
+	{
403
+		$html = preg_replace("(\r\n|\r)", $this->newline, $html);
404
+	}
405
+
406
+	/**
407
+	 * Remove empty lines
408
+	 *
409
+	 * @param string $html
410
+	 *
411
+	 * @return void
412
+	 */
413
+	protected function removeEmptyLines(&$html)
414
+	{
415
+		$temp = explode($this->newline, $html);
416
+		$result = [];
417
+		for ($i = 0; $i < count($temp); ++$i) {
418
+			if ('' == trim($temp[$i])) {
419
+				continue;
420
+			}
421
+			$result[] = $temp[$i];
422
+		}
423
+		$html = implode($this->newline, $result);
424
+	}
425
+
426
+	/**
427
+	 * Include configured header comment in HTML content block
428
+	 *
429
+	 * @param $html
430
+	 */
431
+	public function includeHeaderComment(&$html)
432
+	{
433
+		$html = preg_replace('/^(-->)$/m', "\n\t" . $this->headerComment . "\n$1", $html);
434
+	}
435 435
 }
Please login to merge, or discard this patch.
Spacing   +23 added lines, -23 removed lines patch added patch discarded remove patch
@@ -68,7 +68,7 @@  discard block
 block discarded – undo
68 68
     {
69 69
         if (!empty($config)) {
70 70
             if ($config['formatHtml'] && is_numeric($config['formatHtml'])) {
71
-                $this->formatType = (int)$config['formatHtml'];
71
+                $this->formatType = (int) $config['formatHtml'];
72 72
             }
73 73
 
74 74
             if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) {
@@ -76,14 +76,14 @@  discard block
 block discarded – undo
76 76
             }
77 77
 
78 78
             if (isset($config['formatHtml.']['debugComment'])) {
79
-                $this->debugComment = (bool)$config['formatHtml.']['debugComment'];
79
+                $this->debugComment = (bool) $config['formatHtml.']['debugComment'];
80 80
             }
81 81
 
82 82
             if (isset($config['headerComment'])) {
83 83
                 $this->headerComment = $config['headerComment'];
84 84
             }
85 85
 
86
-            if (isset($config['dropEmptySpaceChar']) && (bool)$config['dropEmptySpaceChar']) {
86
+            if (isset($config['dropEmptySpaceChar']) && (bool) $config['dropEmptySpaceChar']) {
87 87
                 $this->emptySpaceChar = '';
88 88
             }
89 89
         }
@@ -100,7 +100,7 @@  discard block
 block discarded – undo
100 100
     public function clean($html, $config = [])
101 101
     {
102 102
         if (!empty($config)) {
103
-            if ((bool)$config['enabled'] === false) {
103
+            if ((bool) $config['enabled'] === false) {
104 104
                 return $html;
105 105
             }
106 106
 
@@ -111,11 +111,11 @@  discard block
 block discarded – undo
111 111
 
112 112
         $manipulations = [];
113 113
 
114
-        if (isset($config['removeGenerator']) && (bool)$config['removeGenerator']) {
114
+        if (isset($config['removeGenerator']) && (bool) $config['removeGenerator']) {
115 115
             $manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class);
116 116
         }
117 117
 
118
-        if (isset($config['removeComments']) && (bool)$config['removeComments']) {
118
+        if (isset($config['removeComments']) && (bool) $config['removeComments']) {
119 119
             $manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class);
120 120
         }
121 121
 
@@ -125,12 +125,12 @@  discard block
 block discarded – undo
125 125
 
126 126
         foreach ($manipulations as $key => $manipulation) {
127 127
             /** @var ManipulationInterface $manipulation */
128
-            $configuration = isset($config[$key . '.']) && is_array($config[$key . '.']) ? $config[$key . '.'] : [];
128
+            $configuration = isset($config[$key.'.']) && is_array($config[$key.'.']) ? $config[$key.'.'] : [];
129 129
             $html = $manipulation->manipulate($html, $configuration);
130 130
         }
131 131
 
132 132
         // cleanup HTML5 self-closing elements
133
-        if(!isset($GLOBALS['TSFE']->config['config']['doctype']) || 'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'],0,1)) {
133
+        if (!isset($GLOBALS['TSFE']->config['config']['doctype']) || 'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'], 0, 1)) {
134 134
             $html = preg_replace('/<((?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)\s[^>]+?)\s?\/>/', '<$1>', $html);
135 135
         }
136 136
 
@@ -183,7 +183,7 @@  discard block
 block discarded – undo
183 183
         $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
184 184
         $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
185 185
         $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
186
-        $allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')';
186
+        $allBoxLikeElements = '(?>'.$trueBoxElements.'|'.$functionalBoxElements.'|'.$usableBoxElements.'|'.$imagineBoxElements.')';
187 187
         $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
188 188
         $structureBoxLikeElements = '(?>html|head|body|div|!--)';
189 189
 
@@ -192,7 +192,7 @@  discard block
 block discarded – undo
192 192
             '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
193 193
             $html,
194 194
             -1,
195
-            PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY
195
+            PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY
196 196
         );
197 197
 
198 198
         if ($htmlArrayTemp === false) {
@@ -227,52 +227,52 @@  discard block
 block discarded – undo
227 227
             } elseif ($this->formatType == 2 && ( // minimalistic line break
228 228
                     # this element has a line break before itself
229 229
                     preg_match(
230
-                        '/<' . $structureBoxLikeElements . '(.*)>/Usi',
230
+                        '/<'.$structureBoxLikeElements.'(.*)>/Usi',
231 231
                         $htmlArray[$x]
232 232
                     ) || preg_match(
233
-                        '/<' . $structureBoxLikeElements . '(.*) \/>/Usi',
233
+                        '/<'.$structureBoxLikeElements.'(.*) \/>/Usi',
234 234
                         $htmlArray[$x]
235 235
                     ) || # one element before is a element that has a line break after
236 236
                     preg_match(
237
-                        '/<\/' . $structureBoxLikeElements . '(.*)>/Usi',
237
+                        '/<\/'.$structureBoxLikeElements.'(.*)>/Usi',
238 238
                         $htmlArray[$x - 1]
239 239
                     ) || substr(
240 240
                         $htmlArray[$x - 1],
241 241
                         0,
242 242
                         4
243
-                    ) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
243
+                    ) == '<!--' || preg_match('/<'.$structureBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1]))
244 244
             ) {
245 245
                 $newline = true;
246 246
             } elseif ($this->formatType == 3 && ( // aestetic line break
247 247
                     # this element has a line break before itself
248 248
                     preg_match(
249
-                        '/<' . $esteticBoxLikeElements . '(.*)>/Usi',
249
+                        '/<'.$esteticBoxLikeElements.'(.*)>/Usi',
250 250
                         $htmlArray[$x]
251 251
                     ) || preg_match(
252
-                        '/<' . $esteticBoxLikeElements . '(.*) \/>/Usi',
252
+                        '/<'.$esteticBoxLikeElements.'(.*) \/>/Usi',
253 253
                         $htmlArray[$x]
254 254
                     ) || # one element before is a element that has a line break after
255
-                    preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr(
255
+                    preg_match('/<\/'.$esteticBoxLikeElements.'(.*)>/Usi', $htmlArray[$x - 1]) || substr(
256 256
                         $htmlArray[$x - 1],
257 257
                         0,
258 258
                         4
259
-                    ) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
259
+                    ) == '<!--' || preg_match('/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1]))
260 260
             ) {
261 261
                 $newline = true;
262 262
             } elseif ($this->formatType >= 4 && ( // logical line break
263 263
                     # this element has a line break before itself
264 264
                     preg_match(
265
-                        '/<' . $allBoxLikeElements . '(.*)>/Usi',
265
+                        '/<'.$allBoxLikeElements.'(.*)>/Usi',
266 266
                         $htmlArray[$x]
267 267
                     ) || preg_match(
268
-                        '/<' . $allBoxLikeElements . '(.*) \/>/Usi',
268
+                        '/<'.$allBoxLikeElements.'(.*) \/>/Usi',
269 269
                         $htmlArray[$x]
270 270
                     ) || # one element before is a element that has a line break after
271
-                    preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr(
271
+                    preg_match('/<\/'.$allBoxLikeElements.'(.*)>/Usi', $htmlArray[$x - 1]) || substr(
272 272
                         $htmlArray[$x - 1],
273 273
                         0,
274 274
                         4
275
-                    ) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
275
+                    ) == '<!--' || preg_match('/<'.$allBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1]))
276 276
             ) {
277 277
                 $newline = true;
278 278
             }
@@ -430,6 +430,6 @@  discard block
 block discarded – undo
430 430
      */
431 431
     public function includeHeaderComment(&$html)
432 432
     {
433
-        $html = preg_replace('/^(-->)$/m', "\n\t" . $this->headerComment . "\n$1", $html);
433
+        $html = preg_replace('/^(-->)$/m', "\n\t".$this->headerComment."\n$1", $html);
434 434
     }
435 435
 }
Please login to merge, or discard this patch.