Completed
Push — master ( bf423b...0ddf15 )
by Tim
13:40
created
Classes/Service/CleanHtmlService.php 1 patch
Indentation   +411 added lines, -411 removed lines patch added patch discarded remove patch
@@ -15,415 +15,415 @@
 block discarded – undo
15 15
  */
16 16
 class CleanHtmlService implements SingletonInterface
17 17
 {
18
-    /**
19
-     * Enable Debug comment in footer.
20
-     *
21
-     * @var bool
22
-     */
23
-    protected $debugComment = false;
24
-
25
-    /**
26
-     * Format Type.
27
-     *
28
-     * @var int
29
-     */
30
-    protected $formatType = 0;
31
-
32
-    /**
33
-     * Tab character.
34
-     *
35
-     * @var string
36
-     */
37
-    protected $tab = "\t";
38
-
39
-    /**
40
-     * Newline character.
41
-     *
42
-     * @var string
43
-     */
44
-    protected $newline = "\n";
45
-
46
-    /**
47
-     * Configured extra header comment.
48
-     *
49
-     * @var string
50
-     */
51
-    protected $headerComment = '';
52
-
53
-    /**
54
-     * Empty space char.
55
-     *
56
-     * @var string
57
-     */
58
-    protected $emptySpaceChar = ' ';
59
-
60
-    /**
61
-     * Set variables based on given config.
62
-     */
63
-    public function setVariables(array $config): void
64
-    {
65
-        if (!empty($config)) {
66
-            if ($config['formatHtml'] && is_numeric($config['formatHtml'])) {
67
-                $this->formatType = (int) $config['formatHtml'];
68
-            }
69
-
70
-            if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) {
71
-                $this->tab = str_pad('', (int) $config['formatHtml.']['tabSize'], ' ');
72
-            }
73
-
74
-            if (isset($config['formatHtml.']['debugComment'])) {
75
-                $this->debugComment = (bool) $config['formatHtml.']['debugComment'];
76
-            }
77
-
78
-            if (isset($config['headerComment'])) {
79
-                $this->headerComment = $config['headerComment'];
80
-            }
81
-
82
-            if (isset($config['dropEmptySpaceChar']) && (bool) $config['dropEmptySpaceChar']) {
83
-                $this->emptySpaceChar = '';
84
-            }
85
-        }
86
-    }
87
-
88
-    /**
89
-     * Clean given HTML with formatter.
90
-     *
91
-     * @param string $html
92
-     * @param array  $config
93
-     *
94
-     * @return string
95
-     */
96
-    public function clean($html, $config = [])
97
-    {
98
-        if (!empty($config)) {
99
-            $this->setVariables($config);
100
-        }
101
-        // convert line-breaks to UNIX
102
-        $this->convNlOs($html);
103
-
104
-        $manipulations = [];
105
-
106
-        if (isset($config['removeGenerator']) && (bool) $config['removeGenerator']) {
107
-            $manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class);
108
-        }
109
-
110
-        if (isset($config['removeComments']) && (bool) $config['removeComments']) {
111
-            $manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class);
112
-        }
113
-
114
-        if (!empty($this->headerComment)) {
115
-            $this->includeHeaderComment($html);
116
-        }
117
-
118
-        foreach ($manipulations as $key => $manipulation) {
119
-            /** @var ManipulationInterface $manipulation */
120
-            $configuration = isset($config[$key.'.']) && \is_array($config[$key.'.']) ? $config[$key.'.'] : [];
121
-            $html = $manipulation->manipulate($html, $configuration);
122
-        }
123
-
124
-        // cleanup HTML5 self-closing elements
125
-        if (!isset($GLOBALS['TSFE']->config['config']['doctype'])
126
-            || 'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'], 0, 1)) {
127
-            $html = preg_replace(
128
-                '/<((?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)\s[^>]+?)\s?\/>/',
129
-                '<$1>',
130
-                $html
131
-            );
132
-        }
133
-
134
-        if ($this->formatType > 0) {
135
-            $html = $this->formatHtml($html);
136
-        }
137
-        // remove white space after line ending
138
-        $this->rTrimLines($html);
139
-
140
-        // recover line-breaks
141
-        if (Environment::isWindows()) {
142
-            $html = str_replace($this->newline, "\r\n", $html);
143
-        }
144
-
145
-        return $html;
146
-    }
147
-
148
-    /**
149
-     * Formats the (X)HTML code:
150
-     *  - taps according to the hirarchy of the tags
151
-     *  - removes empty spaces between tags
152
-     *  - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..)
153
-     *  choose from five options:
154
-     *    0 => off
155
-     *    1 => no line break at all  (code in one line)
156
-     *    2 => minimalistic line breaks (structure defining box-elements)
157
-     *    3 => aesthetic line breaks (important box-elements)
158
-     *    4 => logic line breaks (all box-elements)
159
-     *    5 => max line breaks (all elements).
160
-     *
161
-     * @param string $html
162
-     *
163
-     * @return string
164
-     */
165
-    protected function formatHtml($html)
166
-    {
167
-        // Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers
168
-        preg_match_all(
169
-            '/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im',
170
-            $html,
171
-            $matches
172
-        );
173
-        $noFormat = $matches[0]; // do not format these block elements
174
-        for ($i = 0; $i < \count($noFormat); ++$i) {
175
-            $html = str_replace($noFormat[$i], "\n<!-- ELEMENT {$i} -->", $html);
176
-        }
177
-
178
-        // define box elements for formatting
179
-        $trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section';
180
-        $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
181
-        $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
182
-        $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
183
-        $allBoxLikeElements = '(?>'.$trueBoxElements.'|'.$functionalBoxElements.'|'.$usableBoxElements.'|'.$imagineBoxElements.')';
184
-        $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
185
-        $structureBoxLikeElements = '(?>html|head|body|div|!--)';
186
-
187
-        // split html into it's elements
188
-        $htmlArrayTemp = preg_split(
189
-            '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
190
-            $html,
191
-            -1,
192
-            \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY
193
-        );
194
-
195
-        if (false === $htmlArrayTemp) {
196
-            // Restore saved comments, styles and java-scripts
197
-            for ($i = 0; $i < \count($noFormat); ++$i) {
198
-                $html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
199
-            }
200
-
201
-            return $html;
202
-        }
203
-        // remove empty lines
204
-        $htmlArray = [''];
205
-        $index = 1;
206
-        for ($x = 0; $x < \count($htmlArrayTemp); ++$x) {
207
-            $text = trim($htmlArrayTemp[$x]);
208
-            $htmlArray[$index] = '' !== $text ? $htmlArrayTemp[$x] : $this->emptySpaceChar;
209
-            ++$index;
210
-        }
211
-
212
-        // rebuild html
213
-        $html = '';
214
-        $tabs = 0;
215
-        for ($x = 0; $x < \count($htmlArray); ++$x) {
216
-            $htmlArrayBefore = isset($htmlArray[$x - 1]) ? $htmlArray[$x - 1] : '';
217
-            $htmlArrayCurrent = isset($htmlArray[$x]) ? $htmlArray[$x] : '';
218
-
219
-            // check if the element should stand in a new line
220
-            $newline = false;
221
-            if ('<?xml' == substr($htmlArrayBefore, 0, 5)) {
222
-                $newline = true;
223
-            } elseif (2 == $this->formatType && ( // minimalistic line break
224
-                    // this element has a line break before itself
225
-                    preg_match(
226
-                        '/<'.$structureBoxLikeElements.'(.*)>/Usi',
227
-                        $htmlArrayCurrent
228
-                    ) || preg_match(
229
-                        '/<'.$structureBoxLikeElements.'(.*) \/>/Usi',
230
-                        $htmlArrayCurrent
231
-                    ) // one element before is a element that has a line break after
232
-                    || preg_match(
233
-                        '/<\/'.$structureBoxLikeElements.'(.*)>/Usi',
234
-                        $htmlArrayBefore
235
-                    ) || '<!--' == substr(
236
-                        $htmlArrayBefore,
237
-                        0,
238
-                        4
239
-                    ) || preg_match('/<'.$structureBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore))
240
-            ) {
241
-                $newline = true;
242
-            } elseif (3 == $this->formatType && ( // aestetic line break
243
-                    // this element has a line break before itself
244
-                    preg_match(
245
-                        '/<'.$esteticBoxLikeElements.'(.*)>/Usi',
246
-                        $htmlArrayCurrent
247
-                    ) || preg_match(
248
-                        '/<'.$esteticBoxLikeElements.'(.*) \/>/Usi',
249
-                        $htmlArrayCurrent
250
-                    ) // one element before is a element that has a line break after
251
-                    || preg_match('/<\/'.$esteticBoxLikeElements.'(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
252
-                        $htmlArrayBefore,
253
-                        0,
254
-                        4
255
-                    ) || preg_match('/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore))
256
-            ) {
257
-                $newline = true;
258
-            } elseif ($this->formatType >= 4 && ( // logical line break
259
-                    // this element has a line break before itself
260
-                    preg_match(
261
-                        '/<'.$allBoxLikeElements.'(.*)>/Usi',
262
-                        $htmlArrayCurrent
263
-                    ) || preg_match(
264
-                        '/<'.$allBoxLikeElements.'(.*) \/>/Usi',
265
-                        $htmlArrayCurrent
266
-                    ) // one element before is a element that has a line break after
267
-                    || preg_match('/<\/'.$allBoxLikeElements.'(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
268
-                        $htmlArrayBefore,
269
-                        0,
270
-                        4
271
-                    ) || preg_match('/<'.$allBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore))
272
-            ) {
273
-                $newline = true;
274
-            }
275
-
276
-            // count down a tab
277
-            if ('</' == substr($htmlArrayCurrent, 0, 2)) {
278
-                --$tabs;
279
-            }
280
-
281
-            // add tabs and line breaks in front of the current tag
282
-            if ($newline) {
283
-                $html .= $this->newline;
284
-                for ($y = 0; $y < $tabs; ++$y) {
285
-                    $html .= $this->tab;
286
-                }
287
-            }
288
-
289
-            // remove white spaces and line breaks and add current tag to the html-string
290
-            if ('<![CDATA[' == substr($htmlArrayCurrent, 0, 9) // remove multiple white space in CDATA / XML
291
-                || '<?xml' == substr($htmlArrayCurrent, 0, 5)
292
-            ) {
293
-                $html .= $this->killWhiteSpace($htmlArrayCurrent);
294
-            } else { // remove all line breaks
295
-                $html .= $this->killLineBreaks($htmlArrayCurrent);
296
-            }
297
-
298
-            // count up a tab
299
-            if ('<' == substr($htmlArrayCurrent, 0, 1) && '/' != substr($htmlArrayCurrent, 1, 1)) {
300
-                if (' ' !== substr($htmlArrayCurrent, 1, 1)
301
-                    && 'img' !== substr($htmlArrayCurrent, 1, 3)
302
-                    && 'source' !== substr($htmlArrayCurrent, 1, 6)
303
-                    && 'br' !== substr($htmlArrayCurrent, 1, 2)
304
-                    && 'hr' !== substr($htmlArrayCurrent, 1, 2)
305
-                    && 'input' !== substr($htmlArrayCurrent, 1, 5)
306
-                    && 'link' !== substr($htmlArrayCurrent, 1, 4)
307
-                    && 'meta' !== substr($htmlArrayCurrent, 1, 4)
308
-                    && 'col ' !== substr($htmlArrayCurrent, 1, 4)
309
-                    && 'frame' !== substr($htmlArrayCurrent, 1, 5)
310
-                    && 'isindex' !== substr($htmlArrayCurrent, 1, 7)
311
-                    && 'param' !== substr($htmlArrayCurrent, 1, 5)
312
-                    && 'area' !== substr($htmlArrayCurrent, 1, 4)
313
-                    && 'base' !== substr($htmlArrayCurrent, 1, 4)
314
-                    && '<!' !== substr($htmlArrayCurrent, 0, 2)
315
-                    && '<?xml' !== substr($htmlArrayCurrent, 0, 5)
316
-                ) {
317
-                    ++$tabs;
318
-                }
319
-            }
320
-        }
321
-
322
-        // Remove empty lines
323
-        if ($this->formatType > 1) {
324
-            $this->removeEmptyLines($html);
325
-        }
326
-
327
-        // Restore saved comments, styles and java-scripts
328
-        for ($i = 0; $i < \count($noFormat); ++$i) {
329
-            $html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
330
-        }
331
-
332
-        // include debug comment at the end
333
-        if (0 != $tabs && true === $this->debugComment) {
334
-            $html .= "<!-- {$tabs} open elements found -->";
335
-        }
336
-
337
-        return $html;
338
-    }
339
-
340
-    /**
341
-     * Remove ALL line breaks and multiple white space.
342
-     *
343
-     * @param string $html
344
-     *
345
-     * @return string
346
-     */
347
-    protected function killLineBreaks($html)
348
-    {
349
-        $html = str_replace($this->newline, '', $html);
350
-
351
-        return preg_replace('/\s\s+/u', ' ', $html);
352
-        //? return preg_replace('/\n|\s+(\s)/u', '$1', $html);
353
-    }
354
-
355
-    /**
356
-     * Remove multiple white space, keeps line breaks.
357
-     *
358
-     * @param string $html
359
-     *
360
-     * @return string
361
-     */
362
-    protected function killWhiteSpace($html)
363
-    {
364
-        $temp = explode($this->newline, $html);
365
-        for ($i = 0; $i < \count($temp); ++$i) {
366
-            if (!trim($temp[$i])) {
367
-                unset($temp[$i]);
368
-                continue;
369
-            }
370
-
371
-            $temp[$i] = trim($temp[$i]);
372
-            $temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]);
373
-        }
374
-
375
-        return implode($this->newline, $temp);
376
-    }
377
-
378
-    /**
379
-     * Remove white space at the end of lines, keeps other white space and line breaks.
380
-     *
381
-     * @param string $html
382
-     *
383
-     * @return string
384
-     */
385
-    protected function rTrimLines(&$html)
386
-    {
387
-        $html = preg_replace('/\s+$/m', '', $html);
388
-    }
389
-
390
-    /**
391
-     * Convert newlines according to the current OS.
392
-     *
393
-     * @param string $html
394
-     *
395
-     * @return string
396
-     */
397
-    protected function convNlOs(&$html)
398
-    {
399
-        $html = preg_replace("(\r\n|\r)", $this->newline, $html);
400
-    }
401
-
402
-    /**
403
-     * Remove empty lines.
404
-     *
405
-     * @param string $html
406
-     */
407
-    protected function removeEmptyLines(&$html): void
408
-    {
409
-        $temp = explode($this->newline, $html);
410
-        $result = [];
411
-        for ($i = 0; $i < \count($temp); ++$i) {
412
-            if ('' == trim($temp[$i])) {
413
-                continue;
414
-            }
415
-            $result[] = $temp[$i];
416
-        }
417
-        $html = implode($this->newline, $result);
418
-    }
419
-
420
-    /**
421
-     * Include configured header comment in HTML content block.
422
-     *
423
-     * @param $html
424
-     */
425
-    public function includeHeaderComment(&$html): void
426
-    {
427
-        $html = preg_replace('/^(-->)$/m', "\n\t".$this->headerComment."\n$1", $html);
428
-    }
18
+	/**
19
+	 * Enable Debug comment in footer.
20
+	 *
21
+	 * @var bool
22
+	 */
23
+	protected $debugComment = false;
24
+
25
+	/**
26
+	 * Format Type.
27
+	 *
28
+	 * @var int
29
+	 */
30
+	protected $formatType = 0;
31
+
32
+	/**
33
+	 * Tab character.
34
+	 *
35
+	 * @var string
36
+	 */
37
+	protected $tab = "\t";
38
+
39
+	/**
40
+	 * Newline character.
41
+	 *
42
+	 * @var string
43
+	 */
44
+	protected $newline = "\n";
45
+
46
+	/**
47
+	 * Configured extra header comment.
48
+	 *
49
+	 * @var string
50
+	 */
51
+	protected $headerComment = '';
52
+
53
+	/**
54
+	 * Empty space char.
55
+	 *
56
+	 * @var string
57
+	 */
58
+	protected $emptySpaceChar = ' ';
59
+
60
+	/**
61
+	 * Set variables based on given config.
62
+	 */
63
+	public function setVariables(array $config): void
64
+	{
65
+		if (!empty($config)) {
66
+			if ($config['formatHtml'] && is_numeric($config['formatHtml'])) {
67
+				$this->formatType = (int) $config['formatHtml'];
68
+			}
69
+
70
+			if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) {
71
+				$this->tab = str_pad('', (int) $config['formatHtml.']['tabSize'], ' ');
72
+			}
73
+
74
+			if (isset($config['formatHtml.']['debugComment'])) {
75
+				$this->debugComment = (bool) $config['formatHtml.']['debugComment'];
76
+			}
77
+
78
+			if (isset($config['headerComment'])) {
79
+				$this->headerComment = $config['headerComment'];
80
+			}
81
+
82
+			if (isset($config['dropEmptySpaceChar']) && (bool) $config['dropEmptySpaceChar']) {
83
+				$this->emptySpaceChar = '';
84
+			}
85
+		}
86
+	}
87
+
88
+	/**
89
+	 * Clean given HTML with formatter.
90
+	 *
91
+	 * @param string $html
92
+	 * @param array  $config
93
+	 *
94
+	 * @return string
95
+	 */
96
+	public function clean($html, $config = [])
97
+	{
98
+		if (!empty($config)) {
99
+			$this->setVariables($config);
100
+		}
101
+		// convert line-breaks to UNIX
102
+		$this->convNlOs($html);
103
+
104
+		$manipulations = [];
105
+
106
+		if (isset($config['removeGenerator']) && (bool) $config['removeGenerator']) {
107
+			$manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class);
108
+		}
109
+
110
+		if (isset($config['removeComments']) && (bool) $config['removeComments']) {
111
+			$manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class);
112
+		}
113
+
114
+		if (!empty($this->headerComment)) {
115
+			$this->includeHeaderComment($html);
116
+		}
117
+
118
+		foreach ($manipulations as $key => $manipulation) {
119
+			/** @var ManipulationInterface $manipulation */
120
+			$configuration = isset($config[$key.'.']) && \is_array($config[$key.'.']) ? $config[$key.'.'] : [];
121
+			$html = $manipulation->manipulate($html, $configuration);
122
+		}
123
+
124
+		// cleanup HTML5 self-closing elements
125
+		if (!isset($GLOBALS['TSFE']->config['config']['doctype'])
126
+			|| 'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'], 0, 1)) {
127
+			$html = preg_replace(
128
+				'/<((?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)\s[^>]+?)\s?\/>/',
129
+				'<$1>',
130
+				$html
131
+			);
132
+		}
133
+
134
+		if ($this->formatType > 0) {
135
+			$html = $this->formatHtml($html);
136
+		}
137
+		// remove white space after line ending
138
+		$this->rTrimLines($html);
139
+
140
+		// recover line-breaks
141
+		if (Environment::isWindows()) {
142
+			$html = str_replace($this->newline, "\r\n", $html);
143
+		}
144
+
145
+		return $html;
146
+	}
147
+
148
+	/**
149
+	 * Formats the (X)HTML code:
150
+	 *  - taps according to the hirarchy of the tags
151
+	 *  - removes empty spaces between tags
152
+	 *  - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..)
153
+	 *  choose from five options:
154
+	 *    0 => off
155
+	 *    1 => no line break at all  (code in one line)
156
+	 *    2 => minimalistic line breaks (structure defining box-elements)
157
+	 *    3 => aesthetic line breaks (important box-elements)
158
+	 *    4 => logic line breaks (all box-elements)
159
+	 *    5 => max line breaks (all elements).
160
+	 *
161
+	 * @param string $html
162
+	 *
163
+	 * @return string
164
+	 */
165
+	protected function formatHtml($html)
166
+	{
167
+		// Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers
168
+		preg_match_all(
169
+			'/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im',
170
+			$html,
171
+			$matches
172
+		);
173
+		$noFormat = $matches[0]; // do not format these block elements
174
+		for ($i = 0; $i < \count($noFormat); ++$i) {
175
+			$html = str_replace($noFormat[$i], "\n<!-- ELEMENT {$i} -->", $html);
176
+		}
177
+
178
+		// define box elements for formatting
179
+		$trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section';
180
+		$functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
181
+		$usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
182
+		$imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
183
+		$allBoxLikeElements = '(?>'.$trueBoxElements.'|'.$functionalBoxElements.'|'.$usableBoxElements.'|'.$imagineBoxElements.')';
184
+		$esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
185
+		$structureBoxLikeElements = '(?>html|head|body|div|!--)';
186
+
187
+		// split html into it's elements
188
+		$htmlArrayTemp = preg_split(
189
+			'/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
190
+			$html,
191
+			-1,
192
+			\PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY
193
+		);
194
+
195
+		if (false === $htmlArrayTemp) {
196
+			// Restore saved comments, styles and java-scripts
197
+			for ($i = 0; $i < \count($noFormat); ++$i) {
198
+				$html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
199
+			}
200
+
201
+			return $html;
202
+		}
203
+		// remove empty lines
204
+		$htmlArray = [''];
205
+		$index = 1;
206
+		for ($x = 0; $x < \count($htmlArrayTemp); ++$x) {
207
+			$text = trim($htmlArrayTemp[$x]);
208
+			$htmlArray[$index] = '' !== $text ? $htmlArrayTemp[$x] : $this->emptySpaceChar;
209
+			++$index;
210
+		}
211
+
212
+		// rebuild html
213
+		$html = '';
214
+		$tabs = 0;
215
+		for ($x = 0; $x < \count($htmlArray); ++$x) {
216
+			$htmlArrayBefore = isset($htmlArray[$x - 1]) ? $htmlArray[$x - 1] : '';
217
+			$htmlArrayCurrent = isset($htmlArray[$x]) ? $htmlArray[$x] : '';
218
+
219
+			// check if the element should stand in a new line
220
+			$newline = false;
221
+			if ('<?xml' == substr($htmlArrayBefore, 0, 5)) {
222
+				$newline = true;
223
+			} elseif (2 == $this->formatType && ( // minimalistic line break
224
+					// this element has a line break before itself
225
+					preg_match(
226
+						'/<'.$structureBoxLikeElements.'(.*)>/Usi',
227
+						$htmlArrayCurrent
228
+					) || preg_match(
229
+						'/<'.$structureBoxLikeElements.'(.*) \/>/Usi',
230
+						$htmlArrayCurrent
231
+					) // one element before is a element that has a line break after
232
+					|| preg_match(
233
+						'/<\/'.$structureBoxLikeElements.'(.*)>/Usi',
234
+						$htmlArrayBefore
235
+					) || '<!--' == substr(
236
+						$htmlArrayBefore,
237
+						0,
238
+						4
239
+					) || preg_match('/<'.$structureBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore))
240
+			) {
241
+				$newline = true;
242
+			} elseif (3 == $this->formatType && ( // aestetic line break
243
+					// this element has a line break before itself
244
+					preg_match(
245
+						'/<'.$esteticBoxLikeElements.'(.*)>/Usi',
246
+						$htmlArrayCurrent
247
+					) || preg_match(
248
+						'/<'.$esteticBoxLikeElements.'(.*) \/>/Usi',
249
+						$htmlArrayCurrent
250
+					) // one element before is a element that has a line break after
251
+					|| preg_match('/<\/'.$esteticBoxLikeElements.'(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
252
+						$htmlArrayBefore,
253
+						0,
254
+						4
255
+					) || preg_match('/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore))
256
+			) {
257
+				$newline = true;
258
+			} elseif ($this->formatType >= 4 && ( // logical line break
259
+					// this element has a line break before itself
260
+					preg_match(
261
+						'/<'.$allBoxLikeElements.'(.*)>/Usi',
262
+						$htmlArrayCurrent
263
+					) || preg_match(
264
+						'/<'.$allBoxLikeElements.'(.*) \/>/Usi',
265
+						$htmlArrayCurrent
266
+					) // one element before is a element that has a line break after
267
+					|| preg_match('/<\/'.$allBoxLikeElements.'(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
268
+						$htmlArrayBefore,
269
+						0,
270
+						4
271
+					) || preg_match('/<'.$allBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore))
272
+			) {
273
+				$newline = true;
274
+			}
275
+
276
+			// count down a tab
277
+			if ('</' == substr($htmlArrayCurrent, 0, 2)) {
278
+				--$tabs;
279
+			}
280
+
281
+			// add tabs and line breaks in front of the current tag
282
+			if ($newline) {
283
+				$html .= $this->newline;
284
+				for ($y = 0; $y < $tabs; ++$y) {
285
+					$html .= $this->tab;
286
+				}
287
+			}
288
+
289
+			// remove white spaces and line breaks and add current tag to the html-string
290
+			if ('<![CDATA[' == substr($htmlArrayCurrent, 0, 9) // remove multiple white space in CDATA / XML
291
+				|| '<?xml' == substr($htmlArrayCurrent, 0, 5)
292
+			) {
293
+				$html .= $this->killWhiteSpace($htmlArrayCurrent);
294
+			} else { // remove all line breaks
295
+				$html .= $this->killLineBreaks($htmlArrayCurrent);
296
+			}
297
+
298
+			// count up a tab
299
+			if ('<' == substr($htmlArrayCurrent, 0, 1) && '/' != substr($htmlArrayCurrent, 1, 1)) {
300
+				if (' ' !== substr($htmlArrayCurrent, 1, 1)
301
+					&& 'img' !== substr($htmlArrayCurrent, 1, 3)
302
+					&& 'source' !== substr($htmlArrayCurrent, 1, 6)
303
+					&& 'br' !== substr($htmlArrayCurrent, 1, 2)
304
+					&& 'hr' !== substr($htmlArrayCurrent, 1, 2)
305
+					&& 'input' !== substr($htmlArrayCurrent, 1, 5)
306
+					&& 'link' !== substr($htmlArrayCurrent, 1, 4)
307
+					&& 'meta' !== substr($htmlArrayCurrent, 1, 4)
308
+					&& 'col ' !== substr($htmlArrayCurrent, 1, 4)
309
+					&& 'frame' !== substr($htmlArrayCurrent, 1, 5)
310
+					&& 'isindex' !== substr($htmlArrayCurrent, 1, 7)
311
+					&& 'param' !== substr($htmlArrayCurrent, 1, 5)
312
+					&& 'area' !== substr($htmlArrayCurrent, 1, 4)
313
+					&& 'base' !== substr($htmlArrayCurrent, 1, 4)
314
+					&& '<!' !== substr($htmlArrayCurrent, 0, 2)
315
+					&& '<?xml' !== substr($htmlArrayCurrent, 0, 5)
316
+				) {
317
+					++$tabs;
318
+				}
319
+			}
320
+		}
321
+
322
+		// Remove empty lines
323
+		if ($this->formatType > 1) {
324
+			$this->removeEmptyLines($html);
325
+		}
326
+
327
+		// Restore saved comments, styles and java-scripts
328
+		for ($i = 0; $i < \count($noFormat); ++$i) {
329
+			$html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
330
+		}
331
+
332
+		// include debug comment at the end
333
+		if (0 != $tabs && true === $this->debugComment) {
334
+			$html .= "<!-- {$tabs} open elements found -->";
335
+		}
336
+
337
+		return $html;
338
+	}
339
+
340
+	/**
341
+	 * Remove ALL line breaks and multiple white space.
342
+	 *
343
+	 * @param string $html
344
+	 *
345
+	 * @return string
346
+	 */
347
+	protected function killLineBreaks($html)
348
+	{
349
+		$html = str_replace($this->newline, '', $html);
350
+
351
+		return preg_replace('/\s\s+/u', ' ', $html);
352
+		//? return preg_replace('/\n|\s+(\s)/u', '$1', $html);
353
+	}
354
+
355
+	/**
356
+	 * Remove multiple white space, keeps line breaks.
357
+	 *
358
+	 * @param string $html
359
+	 *
360
+	 * @return string
361
+	 */
362
+	protected function killWhiteSpace($html)
363
+	{
364
+		$temp = explode($this->newline, $html);
365
+		for ($i = 0; $i < \count($temp); ++$i) {
366
+			if (!trim($temp[$i])) {
367
+				unset($temp[$i]);
368
+				continue;
369
+			}
370
+
371
+			$temp[$i] = trim($temp[$i]);
372
+			$temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]);
373
+		}
374
+
375
+		return implode($this->newline, $temp);
376
+	}
377
+
378
+	/**
379
+	 * Remove white space at the end of lines, keeps other white space and line breaks.
380
+	 *
381
+	 * @param string $html
382
+	 *
383
+	 * @return string
384
+	 */
385
+	protected function rTrimLines(&$html)
386
+	{
387
+		$html = preg_replace('/\s+$/m', '', $html);
388
+	}
389
+
390
+	/**
391
+	 * Convert newlines according to the current OS.
392
+	 *
393
+	 * @param string $html
394
+	 *
395
+	 * @return string
396
+	 */
397
+	protected function convNlOs(&$html)
398
+	{
399
+		$html = preg_replace("(\r\n|\r)", $this->newline, $html);
400
+	}
401
+
402
+	/**
403
+	 * Remove empty lines.
404
+	 *
405
+	 * @param string $html
406
+	 */
407
+	protected function removeEmptyLines(&$html): void
408
+	{
409
+		$temp = explode($this->newline, $html);
410
+		$result = [];
411
+		for ($i = 0; $i < \count($temp); ++$i) {
412
+			if ('' == trim($temp[$i])) {
413
+				continue;
414
+			}
415
+			$result[] = $temp[$i];
416
+		}
417
+		$html = implode($this->newline, $result);
418
+	}
419
+
420
+	/**
421
+	 * Include configured header comment in HTML content block.
422
+	 *
423
+	 * @param $html
424
+	 */
425
+	public function includeHeaderComment(&$html): void
426
+	{
427
+		$html = preg_replace('/^(-->)$/m', "\n\t".$this->headerComment."\n$1", $html);
428
+	}
429 429
 }
Please login to merge, or discard this patch.