Completed
Push — master ( d82100...bf423b )
by Tim
15s queued 13s
created
Classes/Service/CleanHtmlService.php 1 patch
Indentation   +408 added lines, -408 removed lines patch added patch discarded remove patch
@@ -15,412 +15,412 @@
 block discarded – undo
15 15
  */
16 16
 class CleanHtmlService implements SingletonInterface
17 17
 {
18
-    /**
19
-     * Enable Debug comment in footer.
20
-     *
21
-     * @var bool
22
-     */
23
-    protected $debugComment = false;
24
-
25
-    /**
26
-     * Format Type.
27
-     *
28
-     * @var int
29
-     */
30
-    protected $formatType = 0;
31
-
32
-    /**
33
-     * Tab character.
34
-     *
35
-     * @var string
36
-     */
37
-    protected $tab = "\t";
38
-
39
-    /**
40
-     * Newline character.
41
-     *
42
-     * @var string
43
-     */
44
-    protected $newline = "\n";
45
-
46
-    /**
47
-     * Configured extra header comment.
48
-     *
49
-     * @var string
50
-     */
51
-    protected $headerComment = '';
52
-
53
-    /**
54
-     * Empty space char.
55
-     *
56
-     * @var string
57
-     */
58
-    protected $emptySpaceChar = ' ';
59
-
60
-    /**
61
-     * Set variables based on given config.
62
-     */
63
-    public function setVariables(array $config): void
64
-    {
65
-        if (!empty($config)) {
66
-            if ($config['formatHtml'] && is_numeric($config['formatHtml'])) {
67
-                $this->formatType = (int) $config['formatHtml'];
68
-            }
69
-
70
-            if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) {
71
-                $this->tab = str_pad('', (int) $config['formatHtml.']['tabSize'], ' ');
72
-            }
73
-
74
-            if (isset($config['formatHtml.']['debugComment'])) {
75
-                $this->debugComment = (bool) $config['formatHtml.']['debugComment'];
76
-            }
77
-
78
-            if (isset($config['headerComment'])) {
79
-                $this->headerComment = $config['headerComment'];
80
-            }
81
-
82
-            if (isset($config['dropEmptySpaceChar']) && (bool) $config['dropEmptySpaceChar']) {
83
-                $this->emptySpaceChar = '';
84
-            }
85
-        }
86
-    }
87
-
88
-    /**
89
-     * Clean given HTML with formatter.
90
-     *
91
-     * @param string $html
92
-     * @param array  $config
93
-     *
94
-     * @return string
95
-     */
96
-    public function clean($html, $config = [])
97
-    {
98
-        if (!empty($config)) {
99
-            $this->setVariables($config);
100
-        }
101
-        // convert line-breaks to UNIX
102
-        $this->convNlOs($html);
103
-
104
-        $manipulations = [];
105
-
106
-        if (isset($config['removeGenerator']) && (bool) $config['removeGenerator']) {
107
-            $manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class);
108
-        }
109
-
110
-        if (isset($config['removeComments']) && (bool) $config['removeComments']) {
111
-            $manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class);
112
-        }
113
-
114
-        if (!empty($this->headerComment)) {
115
-            $this->includeHeaderComment($html);
116
-        }
117
-
118
-        foreach ($manipulations as $key => $manipulation) {
119
-            /** @var ManipulationInterface $manipulation */
120
-            $configuration = isset($config[$key.'.']) && \is_array($config[$key.'.']) ? $config[$key.'.'] : [];
121
-            $html = $manipulation->manipulate($html, $configuration);
122
-        }
123
-
124
-        // cleanup HTML5 self-closing elements
125
-        if (!isset($GLOBALS['TSFE']->config['config']['doctype'])
126
-            || 'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'], 0, 1)) {
127
-            $html = preg_replace(
128
-                '/<((?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)\s[^>]+?)\s?\/>/',
129
-                '<$1>',
130
-                $html
131
-            );
132
-        }
133
-
134
-        if ($this->formatType > 0) {
135
-            $html = $this->formatHtml($html);
136
-        }
137
-        // remove white space after line ending
138
-        $this->rTrimLines($html);
139
-
140
-        // recover line-breaks
141
-        if (Environment::isWindows()) {
142
-            $html = str_replace($this->newline, "\r\n", $html);
143
-        }
144
-
145
-        return $html;
146
-    }
147
-
148
-    /**
149
-     * Formats the (X)HTML code:
150
-     *  - taps according to the hirarchy of the tags
151
-     *  - removes empty spaces between tags
152
-     *  - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..)
153
-     *  choose from five options:
154
-     *    0 => off
155
-     *    1 => no line break at all  (code in one line)
156
-     *    2 => minimalistic line breaks (structure defining box-elements)
157
-     *    3 => aesthetic line breaks (important box-elements)
158
-     *    4 => logic line breaks (all box-elements)
159
-     *    5 => max line breaks (all elements).
160
-     *
161
-     * @param string $html
162
-     *
163
-     * @return string
164
-     */
165
-    protected function formatHtml($html)
166
-    {
167
-        // Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers
168
-        preg_match_all(
169
-            '/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im',
170
-            $html,
171
-            $matches
172
-        );
173
-        $noFormat = $matches[0]; // do not format these block elements
174
-        for ($i = 0; $i < \count($noFormat); ++$i) {
175
-            $html = str_replace($noFormat[$i], "\n<!-- ELEMENT {$i} -->", $html);
176
-        }
177
-
178
-        // define box elements for formatting
179
-        $trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section';
180
-        $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
181
-        $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
182
-        $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
183
-        $allBoxLikeElements = '(?>'.$trueBoxElements.'|'.$functionalBoxElements.'|'.$usableBoxElements.'|'.$imagineBoxElements.')';
184
-        $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
185
-        $structureBoxLikeElements = '(?>html|head|body|div|!--)';
186
-
187
-        // split html into it's elements
188
-        $htmlArrayTemp = preg_split(
189
-            '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
190
-            $html,
191
-            -1,
192
-            \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY
193
-        );
194
-
195
-        if (false === $htmlArrayTemp) {
196
-            // Restore saved comments, styles and java-scripts
197
-            for ($i = 0; $i < \count($noFormat); ++$i) {
198
-                $html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
199
-            }
200
-
201
-            return $html;
202
-        }
203
-        // remove empty lines
204
-        $htmlArray = [''];
205
-        $index = 1;
206
-        for ($x = 0; $x < \count($htmlArrayTemp); ++$x) {
207
-            $text = trim($htmlArrayTemp[$x]);
208
-            $htmlArray[$index] = '' !== $text ? $htmlArrayTemp[$x] : $this->emptySpaceChar;
209
-            ++$index;
210
-        }
211
-
212
-        // rebuild html
213
-        $html = '';
214
-        $tabs = 0;
215
-        for ($x = 0; $x < \count($htmlArray); ++$x) {
216
-            // check if the element should stand in a new line
217
-            $newline = false;
218
-            if ('<?xml' == substr($htmlArray[$x - 1], 0, 5)) {
219
-                $newline = true;
220
-            } elseif (2 == $this->formatType && ( // minimalistic line break
221
-                    // this element has a line break before itself
222
-                    preg_match(
223
-                        '/<'.$structureBoxLikeElements.'(.*)>/Usi',
224
-                        $htmlArray[$x]
225
-                    ) || preg_match(
226
-                        '/<'.$structureBoxLikeElements.'(.*) \/>/Usi',
227
-                        $htmlArray[$x]
228
-                    ) // one element before is a element that has a line break after
229
-                    || preg_match(
230
-                        '/<\/'.$structureBoxLikeElements.'(.*)>/Usi',
231
-                        $htmlArray[$x - 1]
232
-                    ) || '<!--' == substr(
233
-                        $htmlArray[$x - 1],
234
-                        0,
235
-                        4
236
-                    ) || preg_match('/<'.$structureBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1]))
237
-            ) {
238
-                $newline = true;
239
-            } elseif (3 == $this->formatType && ( // aestetic line break
240
-                    // this element has a line break before itself
241
-                    preg_match(
242
-                        '/<'.$esteticBoxLikeElements.'(.*)>/Usi',
243
-                        $htmlArray[$x]
244
-                    ) || preg_match(
245
-                        '/<'.$esteticBoxLikeElements.'(.*) \/>/Usi',
246
-                        $htmlArray[$x]
247
-                    ) // one element before is a element that has a line break after
248
-                    || preg_match('/<\/'.$esteticBoxLikeElements.'(.*)>/Usi', $htmlArray[$x - 1]) || '<!--' == substr(
249
-                        $htmlArray[$x - 1],
250
-                        0,
251
-                        4
252
-                    ) || preg_match('/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1]))
253
-            ) {
254
-                $newline = true;
255
-            } elseif ($this->formatType >= 4 && ( // logical line break
256
-                    // this element has a line break before itself
257
-                    preg_match(
258
-                        '/<'.$allBoxLikeElements.'(.*)>/Usi',
259
-                        $htmlArray[$x]
260
-                    ) || preg_match(
261
-                        '/<'.$allBoxLikeElements.'(.*) \/>/Usi',
262
-                        $htmlArray[$x]
263
-                    ) // one element before is a element that has a line break after
264
-                    || preg_match('/<\/'.$allBoxLikeElements.'(.*)>/Usi', $htmlArray[$x - 1]) || '<!--' == substr(
265
-                        $htmlArray[$x - 1],
266
-                        0,
267
-                        4
268
-                    ) || preg_match('/<'.$allBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1]))
269
-            ) {
270
-                $newline = true;
271
-            }
272
-
273
-            // count down a tab
274
-            if ('</' == substr($htmlArray[$x], 0, 2)) {
275
-                --$tabs;
276
-            }
277
-
278
-            // add tabs and line breaks in front of the current tag
279
-            if ($newline) {
280
-                $html .= $this->newline;
281
-                for ($y = 0; $y < $tabs; ++$y) {
282
-                    $html .= $this->tab;
283
-                }
284
-            }
285
-
286
-            // remove white spaces and line breaks and add current tag to the html-string
287
-            if ('<![CDATA[' == substr($htmlArray[$x], 0, 9) // remove multiple white space in CDATA / XML
288
-                || '<?xml' == substr($htmlArray[$x], 0, 5)
289
-            ) {
290
-                $html .= $this->killWhiteSpace($htmlArray[$x]);
291
-            } else { // remove all line breaks
292
-                $html .= $this->killLineBreaks($htmlArray[$x]);
293
-            }
294
-
295
-            // count up a tab
296
-            if ('<' == substr($htmlArray[$x], 0, 1) && '/' != substr($htmlArray[$x], 1, 1)) {
297
-                if (' ' !== substr($htmlArray[$x], 1, 1)
298
-                    && 'img' !== substr($htmlArray[$x], 1, 3)
299
-                    && 'source' !== substr($htmlArray[$x], 1, 6)
300
-                    && 'br' !== substr($htmlArray[$x], 1, 2)
301
-                    && 'hr' !== substr($htmlArray[$x], 1, 2)
302
-                    && 'input' !== substr($htmlArray[$x], 1, 5)
303
-                    && 'link' !== substr($htmlArray[$x], 1, 4)
304
-                    && 'meta' !== substr($htmlArray[$x], 1, 4)
305
-                    && 'col ' !== substr($htmlArray[$x], 1, 4)
306
-                    && 'frame' !== substr($htmlArray[$x], 1, 5)
307
-                    && 'isindex' !== substr($htmlArray[$x], 1, 7)
308
-                    && 'param' !== substr($htmlArray[$x], 1, 5)
309
-                    && 'area' !== substr($htmlArray[$x], 1, 4)
310
-                    && 'base' !== substr($htmlArray[$x], 1, 4)
311
-                    && '<!' !== substr($htmlArray[$x], 0, 2)
312
-                    && '<?xml' !== substr($htmlArray[$x], 0, 5)
313
-                ) {
314
-                    ++$tabs;
315
-                }
316
-            }
317
-        }
318
-
319
-        // Remove empty lines
320
-        if ($this->formatType > 1) {
321
-            $this->removeEmptyLines($html);
322
-        }
323
-
324
-        // Restore saved comments, styles and java-scripts
325
-        for ($i = 0; $i < \count($noFormat); ++$i) {
326
-            $html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
327
-        }
328
-
329
-        // include debug comment at the end
330
-        if (0 != $tabs && true === $this->debugComment) {
331
-            $html .= "<!-- {$tabs} open elements found -->";
332
-        }
333
-
334
-        return $html;
335
-    }
336
-
337
-    /**
338
-     * Remove ALL line breaks and multiple white space.
339
-     *
340
-     * @param string $html
341
-     *
342
-     * @return string
343
-     */
344
-    protected function killLineBreaks($html)
345
-    {
346
-        $html = str_replace($this->newline, '', $html);
347
-
348
-        return preg_replace('/\s\s+/u', ' ', $html);
349
-        //? return preg_replace('/\n|\s+(\s)/u', '$1', $html);
350
-    }
351
-
352
-    /**
353
-     * Remove multiple white space, keeps line breaks.
354
-     *
355
-     * @param string $html
356
-     *
357
-     * @return string
358
-     */
359
-    protected function killWhiteSpace($html)
360
-    {
361
-        $temp = explode($this->newline, $html);
362
-        for ($i = 0; $i < \count($temp); ++$i) {
363
-            if (!trim($temp[$i])) {
364
-                unset($temp[$i]);
365
-                continue;
366
-            }
367
-
368
-            $temp[$i] = trim($temp[$i]);
369
-            $temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]);
370
-        }
371
-
372
-        return implode($this->newline, $temp);
373
-    }
374
-
375
-    /**
376
-     * Remove white space at the end of lines, keeps other white space and line breaks.
377
-     *
378
-     * @param string $html
379
-     *
380
-     * @return string
381
-     */
382
-    protected function rTrimLines(&$html)
383
-    {
384
-        $html = preg_replace('/\s+$/m', '', $html);
385
-    }
386
-
387
-    /**
388
-     * Convert newlines according to the current OS.
389
-     *
390
-     * @param string $html
391
-     *
392
-     * @return string
393
-     */
394
-    protected function convNlOs(&$html)
395
-    {
396
-        $html = preg_replace("(\r\n|\r)", $this->newline, $html);
397
-    }
398
-
399
-    /**
400
-     * Remove empty lines.
401
-     *
402
-     * @param string $html
403
-     */
404
-    protected function removeEmptyLines(&$html): void
405
-    {
406
-        $temp = explode($this->newline, $html);
407
-        $result = [];
408
-        for ($i = 0; $i < \count($temp); ++$i) {
409
-            if ('' == trim($temp[$i])) {
410
-                continue;
411
-            }
412
-            $result[] = $temp[$i];
413
-        }
414
-        $html = implode($this->newline, $result);
415
-    }
416
-
417
-    /**
418
-     * Include configured header comment in HTML content block.
419
-     *
420
-     * @param $html
421
-     */
422
-    public function includeHeaderComment(&$html): void
423
-    {
424
-        $html = preg_replace('/^(-->)$/m', "\n\t".$this->headerComment."\n$1", $html);
425
-    }
18
+	/**
19
+	 * Enable Debug comment in footer.
20
+	 *
21
+	 * @var bool
22
+	 */
23
+	protected $debugComment = false;
24
+
25
+	/**
26
+	 * Format Type.
27
+	 *
28
+	 * @var int
29
+	 */
30
+	protected $formatType = 0;
31
+
32
+	/**
33
+	 * Tab character.
34
+	 *
35
+	 * @var string
36
+	 */
37
+	protected $tab = "\t";
38
+
39
+	/**
40
+	 * Newline character.
41
+	 *
42
+	 * @var string
43
+	 */
44
+	protected $newline = "\n";
45
+
46
+	/**
47
+	 * Configured extra header comment.
48
+	 *
49
+	 * @var string
50
+	 */
51
+	protected $headerComment = '';
52
+
53
+	/**
54
+	 * Empty space char.
55
+	 *
56
+	 * @var string
57
+	 */
58
+	protected $emptySpaceChar = ' ';
59
+
60
+	/**
61
+	 * Set variables based on given config.
62
+	 */
63
+	public function setVariables(array $config): void
64
+	{
65
+		if (!empty($config)) {
66
+			if ($config['formatHtml'] && is_numeric($config['formatHtml'])) {
67
+				$this->formatType = (int) $config['formatHtml'];
68
+			}
69
+
70
+			if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) {
71
+				$this->tab = str_pad('', (int) $config['formatHtml.']['tabSize'], ' ');
72
+			}
73
+
74
+			if (isset($config['formatHtml.']['debugComment'])) {
75
+				$this->debugComment = (bool) $config['formatHtml.']['debugComment'];
76
+			}
77
+
78
+			if (isset($config['headerComment'])) {
79
+				$this->headerComment = $config['headerComment'];
80
+			}
81
+
82
+			if (isset($config['dropEmptySpaceChar']) && (bool) $config['dropEmptySpaceChar']) {
83
+				$this->emptySpaceChar = '';
84
+			}
85
+		}
86
+	}
87
+
88
+	/**
89
+	 * Clean given HTML with formatter.
90
+	 *
91
+	 * @param string $html
92
+	 * @param array  $config
93
+	 *
94
+	 * @return string
95
+	 */
96
+	public function clean($html, $config = [])
97
+	{
98
+		if (!empty($config)) {
99
+			$this->setVariables($config);
100
+		}
101
+		// convert line-breaks to UNIX
102
+		$this->convNlOs($html);
103
+
104
+		$manipulations = [];
105
+
106
+		if (isset($config['removeGenerator']) && (bool) $config['removeGenerator']) {
107
+			$manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class);
108
+		}
109
+
110
+		if (isset($config['removeComments']) && (bool) $config['removeComments']) {
111
+			$manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class);
112
+		}
113
+
114
+		if (!empty($this->headerComment)) {
115
+			$this->includeHeaderComment($html);
116
+		}
117
+
118
+		foreach ($manipulations as $key => $manipulation) {
119
+			/** @var ManipulationInterface $manipulation */
120
+			$configuration = isset($config[$key.'.']) && \is_array($config[$key.'.']) ? $config[$key.'.'] : [];
121
+			$html = $manipulation->manipulate($html, $configuration);
122
+		}
123
+
124
+		// cleanup HTML5 self-closing elements
125
+		if (!isset($GLOBALS['TSFE']->config['config']['doctype'])
126
+			|| 'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'], 0, 1)) {
127
+			$html = preg_replace(
128
+				'/<((?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)\s[^>]+?)\s?\/>/',
129
+				'<$1>',
130
+				$html
131
+			);
132
+		}
133
+
134
+		if ($this->formatType > 0) {
135
+			$html = $this->formatHtml($html);
136
+		}
137
+		// remove white space after line ending
138
+		$this->rTrimLines($html);
139
+
140
+		// recover line-breaks
141
+		if (Environment::isWindows()) {
142
+			$html = str_replace($this->newline, "\r\n", $html);
143
+		}
144
+
145
+		return $html;
146
+	}
147
+
148
+	/**
149
+	 * Formats the (X)HTML code:
150
+	 *  - taps according to the hirarchy of the tags
151
+	 *  - removes empty spaces between tags
152
+	 *  - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..)
153
+	 *  choose from five options:
154
+	 *    0 => off
155
+	 *    1 => no line break at all  (code in one line)
156
+	 *    2 => minimalistic line breaks (structure defining box-elements)
157
+	 *    3 => aesthetic line breaks (important box-elements)
158
+	 *    4 => logic line breaks (all box-elements)
159
+	 *    5 => max line breaks (all elements).
160
+	 *
161
+	 * @param string $html
162
+	 *
163
+	 * @return string
164
+	 */
165
+	protected function formatHtml($html)
166
+	{
167
+		// Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers
168
+		preg_match_all(
169
+			'/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im',
170
+			$html,
171
+			$matches
172
+		);
173
+		$noFormat = $matches[0]; // do not format these block elements
174
+		for ($i = 0; $i < \count($noFormat); ++$i) {
175
+			$html = str_replace($noFormat[$i], "\n<!-- ELEMENT {$i} -->", $html);
176
+		}
177
+
178
+		// define box elements for formatting
179
+		$trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section';
180
+		$functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
181
+		$usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
182
+		$imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
183
+		$allBoxLikeElements = '(?>'.$trueBoxElements.'|'.$functionalBoxElements.'|'.$usableBoxElements.'|'.$imagineBoxElements.')';
184
+		$esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
185
+		$structureBoxLikeElements = '(?>html|head|body|div|!--)';
186
+
187
+		// split html into it's elements
188
+		$htmlArrayTemp = preg_split(
189
+			'/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
190
+			$html,
191
+			-1,
192
+			\PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY
193
+		);
194
+
195
+		if (false === $htmlArrayTemp) {
196
+			// Restore saved comments, styles and java-scripts
197
+			for ($i = 0; $i < \count($noFormat); ++$i) {
198
+				$html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
199
+			}
200
+
201
+			return $html;
202
+		}
203
+		// remove empty lines
204
+		$htmlArray = [''];
205
+		$index = 1;
206
+		for ($x = 0; $x < \count($htmlArrayTemp); ++$x) {
207
+			$text = trim($htmlArrayTemp[$x]);
208
+			$htmlArray[$index] = '' !== $text ? $htmlArrayTemp[$x] : $this->emptySpaceChar;
209
+			++$index;
210
+		}
211
+
212
+		// rebuild html
213
+		$html = '';
214
+		$tabs = 0;
215
+		for ($x = 0; $x < \count($htmlArray); ++$x) {
216
+			// check if the element should stand in a new line
217
+			$newline = false;
218
+			if ('<?xml' == substr($htmlArray[$x - 1], 0, 5)) {
219
+				$newline = true;
220
+			} elseif (2 == $this->formatType && ( // minimalistic line break
221
+					// this element has a line break before itself
222
+					preg_match(
223
+						'/<'.$structureBoxLikeElements.'(.*)>/Usi',
224
+						$htmlArray[$x]
225
+					) || preg_match(
226
+						'/<'.$structureBoxLikeElements.'(.*) \/>/Usi',
227
+						$htmlArray[$x]
228
+					) // one element before is a element that has a line break after
229
+					|| preg_match(
230
+						'/<\/'.$structureBoxLikeElements.'(.*)>/Usi',
231
+						$htmlArray[$x - 1]
232
+					) || '<!--' == substr(
233
+						$htmlArray[$x - 1],
234
+						0,
235
+						4
236
+					) || preg_match('/<'.$structureBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1]))
237
+			) {
238
+				$newline = true;
239
+			} elseif (3 == $this->formatType && ( // aestetic line break
240
+					// this element has a line break before itself
241
+					preg_match(
242
+						'/<'.$esteticBoxLikeElements.'(.*)>/Usi',
243
+						$htmlArray[$x]
244
+					) || preg_match(
245
+						'/<'.$esteticBoxLikeElements.'(.*) \/>/Usi',
246
+						$htmlArray[$x]
247
+					) // one element before is a element that has a line break after
248
+					|| preg_match('/<\/'.$esteticBoxLikeElements.'(.*)>/Usi', $htmlArray[$x - 1]) || '<!--' == substr(
249
+						$htmlArray[$x - 1],
250
+						0,
251
+						4
252
+					) || preg_match('/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1]))
253
+			) {
254
+				$newline = true;
255
+			} elseif ($this->formatType >= 4 && ( // logical line break
256
+					// this element has a line break before itself
257
+					preg_match(
258
+						'/<'.$allBoxLikeElements.'(.*)>/Usi',
259
+						$htmlArray[$x]
260
+					) || preg_match(
261
+						'/<'.$allBoxLikeElements.'(.*) \/>/Usi',
262
+						$htmlArray[$x]
263
+					) // one element before is a element that has a line break after
264
+					|| preg_match('/<\/'.$allBoxLikeElements.'(.*)>/Usi', $htmlArray[$x - 1]) || '<!--' == substr(
265
+						$htmlArray[$x - 1],
266
+						0,
267
+						4
268
+					) || preg_match('/<'.$allBoxLikeElements.'(.*) \/>/Usi', $htmlArray[$x - 1]))
269
+			) {
270
+				$newline = true;
271
+			}
272
+
273
+			// count down a tab
274
+			if ('</' == substr($htmlArray[$x], 0, 2)) {
275
+				--$tabs;
276
+			}
277
+
278
+			// add tabs and line breaks in front of the current tag
279
+			if ($newline) {
280
+				$html .= $this->newline;
281
+				for ($y = 0; $y < $tabs; ++$y) {
282
+					$html .= $this->tab;
283
+				}
284
+			}
285
+
286
+			// remove white spaces and line breaks and add current tag to the html-string
287
+			if ('<![CDATA[' == substr($htmlArray[$x], 0, 9) // remove multiple white space in CDATA / XML
288
+				|| '<?xml' == substr($htmlArray[$x], 0, 5)
289
+			) {
290
+				$html .= $this->killWhiteSpace($htmlArray[$x]);
291
+			} else { // remove all line breaks
292
+				$html .= $this->killLineBreaks($htmlArray[$x]);
293
+			}
294
+
295
+			// count up a tab
296
+			if ('<' == substr($htmlArray[$x], 0, 1) && '/' != substr($htmlArray[$x], 1, 1)) {
297
+				if (' ' !== substr($htmlArray[$x], 1, 1)
298
+					&& 'img' !== substr($htmlArray[$x], 1, 3)
299
+					&& 'source' !== substr($htmlArray[$x], 1, 6)
300
+					&& 'br' !== substr($htmlArray[$x], 1, 2)
301
+					&& 'hr' !== substr($htmlArray[$x], 1, 2)
302
+					&& 'input' !== substr($htmlArray[$x], 1, 5)
303
+					&& 'link' !== substr($htmlArray[$x], 1, 4)
304
+					&& 'meta' !== substr($htmlArray[$x], 1, 4)
305
+					&& 'col ' !== substr($htmlArray[$x], 1, 4)
306
+					&& 'frame' !== substr($htmlArray[$x], 1, 5)
307
+					&& 'isindex' !== substr($htmlArray[$x], 1, 7)
308
+					&& 'param' !== substr($htmlArray[$x], 1, 5)
309
+					&& 'area' !== substr($htmlArray[$x], 1, 4)
310
+					&& 'base' !== substr($htmlArray[$x], 1, 4)
311
+					&& '<!' !== substr($htmlArray[$x], 0, 2)
312
+					&& '<?xml' !== substr($htmlArray[$x], 0, 5)
313
+				) {
314
+					++$tabs;
315
+				}
316
+			}
317
+		}
318
+
319
+		// Remove empty lines
320
+		if ($this->formatType > 1) {
321
+			$this->removeEmptyLines($html);
322
+		}
323
+
324
+		// Restore saved comments, styles and java-scripts
325
+		for ($i = 0; $i < \count($noFormat); ++$i) {
326
+			$html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
327
+		}
328
+
329
+		// include debug comment at the end
330
+		if (0 != $tabs && true === $this->debugComment) {
331
+			$html .= "<!-- {$tabs} open elements found -->";
332
+		}
333
+
334
+		return $html;
335
+	}
336
+
337
+	/**
338
+	 * Remove ALL line breaks and multiple white space.
339
+	 *
340
+	 * @param string $html
341
+	 *
342
+	 * @return string
343
+	 */
344
+	protected function killLineBreaks($html)
345
+	{
346
+		$html = str_replace($this->newline, '', $html);
347
+
348
+		return preg_replace('/\s\s+/u', ' ', $html);
349
+		//? return preg_replace('/\n|\s+(\s)/u', '$1', $html);
350
+	}
351
+
352
+	/**
353
+	 * Remove multiple white space, keeps line breaks.
354
+	 *
355
+	 * @param string $html
356
+	 *
357
+	 * @return string
358
+	 */
359
+	protected function killWhiteSpace($html)
360
+	{
361
+		$temp = explode($this->newline, $html);
362
+		for ($i = 0; $i < \count($temp); ++$i) {
363
+			if (!trim($temp[$i])) {
364
+				unset($temp[$i]);
365
+				continue;
366
+			}
367
+
368
+			$temp[$i] = trim($temp[$i]);
369
+			$temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]);
370
+		}
371
+
372
+		return implode($this->newline, $temp);
373
+	}
374
+
375
+	/**
376
+	 * Remove white space at the end of lines, keeps other white space and line breaks.
377
+	 *
378
+	 * @param string $html
379
+	 *
380
+	 * @return string
381
+	 */
382
+	protected function rTrimLines(&$html)
383
+	{
384
+		$html = preg_replace('/\s+$/m', '', $html);
385
+	}
386
+
387
+	/**
388
+	 * Convert newlines according to the current OS.
389
+	 *
390
+	 * @param string $html
391
+	 *
392
+	 * @return string
393
+	 */
394
+	protected function convNlOs(&$html)
395
+	{
396
+		$html = preg_replace("(\r\n|\r)", $this->newline, $html);
397
+	}
398
+
399
+	/**
400
+	 * Remove empty lines.
401
+	 *
402
+	 * @param string $html
403
+	 */
404
+	protected function removeEmptyLines(&$html): void
405
+	{
406
+		$temp = explode($this->newline, $html);
407
+		$result = [];
408
+		for ($i = 0; $i < \count($temp); ++$i) {
409
+			if ('' == trim($temp[$i])) {
410
+				continue;
411
+			}
412
+			$result[] = $temp[$i];
413
+		}
414
+		$html = implode($this->newline, $result);
415
+	}
416
+
417
+	/**
418
+	 * Include configured header comment in HTML content block.
419
+	 *
420
+	 * @param $html
421
+	 */
422
+	public function includeHeaderComment(&$html): void
423
+	{
424
+		$html = preg_replace('/^(-->)$/m', "\n\t".$this->headerComment."\n$1", $html);
425
+	}
426 426
 }
Please login to merge, or discard this patch.