Completed
Push — master ( d03d2a...1a1551 )
by Tim
24:35 queued 09:33
created
Classes/Service/CleanHtmlService.php 2 patches
Indentation   +370 added lines, -370 removed lines patch added patch discarded remove patch
@@ -17,374 +17,374 @@
 block discarded – undo
17 17
  */
18 18
 class CleanHtmlService implements SingletonInterface
19 19
 {
20
-    /**
21
-     * Enable Debug comment in footer.
22
-     */
23
-    protected bool $debugComment = false;
24
-
25
-    /**
26
-     * Format Type.
27
-     */
28
-    protected int $formatType = 0;
29
-
30
-    /**
31
-     * Tab character.
32
-     */
33
-    protected string $tab = "\t";
34
-
35
-    /**
36
-     * Newline character.
37
-     */
38
-    protected string $newline = "\n";
39
-
40
-    /**
41
-     * Configured extra header comment.
42
-     */
43
-    protected string $headerComment = '';
44
-
45
-    /**
46
-     * Empty space char.
47
-     */
48
-    protected string $emptySpaceChar = ' ';
49
-
50
-    /**
51
-     * Set variables based on given config.
52
-     */
53
-    public function setVariables(array $config): void
54
-    {
55
-        if (isset($config['headerComment']) && !empty($config['headerComment'])) {
56
-            $this->headerComment = $config['headerComment'];
57
-        }
58
-
59
-        if (isset($config['formatHtml']) && is_numeric($config['formatHtml'])) {
60
-            $this->formatType = (int) $config['formatHtml'];
61
-        }
62
-
63
-        if (isset($config['formatHtml.']['tabSize']) && is_numeric($config['formatHtml.']['tabSize'])) {
64
-            $this->tab = str_pad('', (int) $config['formatHtml.']['tabSize'], ' ');
65
-        }
66
-
67
-        if (isset($config['formatHtml.']['debugComment'])) {
68
-            $this->debugComment = (bool) $config['formatHtml.']['debugComment'];
69
-        }
70
-
71
-        if (isset($config['dropEmptySpaceChar']) && (bool) $config['dropEmptySpaceChar']) {
72
-            $this->emptySpaceChar = '';
73
-        }
74
-    }
75
-
76
-    /**
77
-     * Clean given HTML with formatter.
78
-     */
79
-    public function clean(string $html, array $config = []): string
80
-    {
81
-        if (!empty($config)) {
82
-            $this->setVariables($config);
83
-        }
84
-
85
-        // convert line-breaks to UNIX
86
-        $this->convNlOs($html);
87
-
88
-        $manipulations = [];
89
-
90
-        if (isset($config['removeGenerator']) && (bool) $config['removeGenerator']) {
91
-            $manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class);
92
-        }
93
-
94
-        if (isset($config['removeComments']) && (bool) $config['removeComments']) {
95
-            $manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class);
96
-        }
97
-
98
-        if (!empty($this->headerComment)) {
99
-            $this->includeHeaderComment($html);
100
-        }
101
-
102
-        foreach ($manipulations as $key => $manipulation) {
103
-            /** @var ManipulationInterface $manipulation */
104
-            $configuration = isset($config[$key . '.']) && \is_array($config[$key . '.']) ? $config[$key . '.'] : [];
105
-            $html = $manipulation->manipulate($html, $configuration);
106
-        }
107
-
108
-        // cleanup HTML5 self-closing elements
109
-        if (!isset($GLOBALS['TSFE']->config['config']['doctype'])
110
-            || 'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'], 0, 1)) {
111
-            $html = preg_replace(
112
-                '/<((?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)\s[^>]+?)\s*\\\?\/>/',
113
-                '<$1>',
114
-                $html
115
-            );
116
-        }
117
-
118
-        if ($this->formatType > 0) {
119
-            $html = $this->formatHtml($html);
120
-        }
121
-
122
-        // remove white space after line ending
123
-        $this->rTrimLines($html);
124
-
125
-        // recover line-breaks
126
-        if (Environment::isWindows()) {
127
-            $html = str_replace($this->newline, "\r\n", $html);
128
-        }
129
-
130
-        return (string) $html;
131
-    }
132
-
133
-    /**
134
-     * Formats the (X)HTML code:
135
-     *  - taps according to the hirarchy of the tags
136
-     *  - removes empty spaces between tags
137
-     *  - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..)
138
-     *  choose from five options:
139
-     *    0 => off
140
-     *    1 => no line break at all  (code in one line)
141
-     *    2 => minimalistic line breaks (structure defining box-elements)
142
-     *    3 => aesthetic line breaks (important box-elements)
143
-     *    4 => logic line breaks (all box-elements)
144
-     *    5 => max line breaks (all elements).
145
-     */
146
-    protected function formatHtml(string $html): string
147
-    {
148
-        // Save original formated pre, textarea, comments, styles and scripts & replace them with markers
149
-        preg_match_all(
150
-            '/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im',
151
-            $html,
152
-            $matches
153
-        );
154
-        $noFormat = $matches[0]; // do not format these block elements
155
-        for ($i = 0; $i < \count($noFormat); ++$i) {
156
-            $html = str_replace($noFormat[$i], "\n<!-- ELEMENT {$i} -->", $html);
157
-        }
158
-
159
-        // define box elements for formatting
160
-        $trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section';
161
-        $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
162
-        $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
163
-        $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
164
-        $allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')';
165
-        $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
166
-        $structureBoxLikeElements = '(?>html|head|body|div|!--)';
167
-
168
-        // split html into it's elements
169
-        $htmlArrayTemp = preg_split(
170
-            '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
171
-            $html,
172
-            -1,
173
-            \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY
174
-        );
175
-
176
-        if (false === $htmlArrayTemp) {
177
-            // Restore saved comments, styles and scripts
178
-            for ($i = 0; $i < \count($noFormat); ++$i) {
179
-                $html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
180
-            }
181
-
182
-            return $html;
183
-        }
184
-        // remove empty lines
185
-        $htmlArray = [''];
186
-        $index = 1;
187
-        for ($x = 0; $x < \count($htmlArrayTemp); ++$x) {
188
-            $text = trim($htmlArrayTemp[$x]);
189
-            $htmlArray[$index] = '' !== $text ? $htmlArrayTemp[$x] : $this->emptySpaceChar;
190
-            ++$index;
191
-        }
192
-
193
-        // rebuild html
194
-        $html = '';
195
-        $tabs = 0;
196
-        for ($x = 0; $x < \count($htmlArray); ++$x) {
197
-            $htmlArrayBefore = $htmlArray[$x - 1] ?? '';
198
-            $htmlArrayCurrent = $htmlArray[$x] ?? '';
199
-
200
-            // check if the element should stand in a new line
201
-            $newline = false;
202
-            if ('<?xml' == substr($htmlArrayBefore, 0, 5)) {
203
-                $newline = true;
204
-            } elseif (2 == $this->formatType && ( // minimalistic line break
205
-                // this element has a line break before itself
206
-                preg_match(
207
-                    '/<' . $structureBoxLikeElements . '(.*)>/Usi',
208
-                    $htmlArrayCurrent
209
-                ) || preg_match(
210
-                    '/<' . $structureBoxLikeElements . '(.*) \/>/Usi',
211
-                    $htmlArrayCurrent
212
-                ) // one element before is a element that has a line break after
213
-                || preg_match(
214
-                    '/<\/' . $structureBoxLikeElements . '(.*)>/Usi',
215
-                    $htmlArrayBefore
216
-                ) || '<!--' == substr(
217
-                    $htmlArrayBefore,
218
-                    0,
219
-                    4
220
-                ) || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArrayBefore))
221
-            ) {
222
-                $newline = true;
223
-            } elseif (3 == $this->formatType && ( // aestetic line break
224
-                // this element has a line break before itself
225
-                preg_match(
226
-                    '/<' . $esteticBoxLikeElements . '(.*)>/Usi',
227
-                    $htmlArrayCurrent
228
-                ) || preg_match(
229
-                    '/<' . $esteticBoxLikeElements . '(.*) \/>/Usi',
230
-                    $htmlArrayCurrent
231
-                ) // one element before is a element that has a line break after
232
-                || preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
233
-                    $htmlArrayBefore,
234
-                    0,
235
-                    4
236
-                ) || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArrayBefore))
237
-            ) {
238
-                $newline = true;
239
-            } elseif ($this->formatType >= 4 && ( // logical line break
240
-                // this element has a line break before itself
241
-                preg_match(
242
-                    '/<' . $allBoxLikeElements . '(.*)>/Usi',
243
-                    $htmlArrayCurrent
244
-                ) || preg_match(
245
-                    '/<' . $allBoxLikeElements . '(.*) \/>/Usi',
246
-                    $htmlArrayCurrent
247
-                ) // one element before is a element that has a line break after
248
-                || preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
249
-                    $htmlArrayBefore,
250
-                    0,
251
-                    4
252
-                ) || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArrayBefore))
253
-            ) {
254
-                $newline = true;
255
-            }
256
-
257
-            // count down a tab
258
-            if ('</' == substr($htmlArrayCurrent, 0, 2)) {
259
-                --$tabs;
260
-            }
261
-
262
-            // add tabs and line breaks in front of the current tag
263
-            if ($newline) {
264
-                $html .= $this->newline;
265
-                for ($y = 0; $y < $tabs; ++$y) {
266
-                    $html .= $this->tab;
267
-                }
268
-            }
269
-
270
-            // remove white spaces and line breaks and add current tag to the html-string
271
-            if ('<![CDATA[' == substr($htmlArrayCurrent, 0, 9) // remove multiple white space in CDATA / XML
272
-                || '<?xml' == substr($htmlArrayCurrent, 0, 5)
273
-            ) {
274
-                $html .= $this->killWhiteSpace($htmlArrayCurrent);
275
-            } else { // remove all line breaks
276
-                $html .= $this->killLineBreaks($htmlArrayCurrent);
277
-            }
278
-
279
-            // count up a tab
280
-            if ('<' == substr($htmlArrayCurrent, 0, 1) && '/' != substr($htmlArrayCurrent, 1, 1)) {
281
-                if (' ' !== substr($htmlArrayCurrent, 1, 1)
282
-                    && 'img' !== substr($htmlArrayCurrent, 1, 3)
283
-                    && 'source' !== substr($htmlArrayCurrent, 1, 6)
284
-                    && 'br' !== substr($htmlArrayCurrent, 1, 2)
285
-                    && 'hr' !== substr($htmlArrayCurrent, 1, 2)
286
-                    && 'input' !== substr($htmlArrayCurrent, 1, 5)
287
-                    && 'link' !== substr($htmlArrayCurrent, 1, 4)
288
-                    && 'meta' !== substr($htmlArrayCurrent, 1, 4)
289
-                    && 'col ' !== substr($htmlArrayCurrent, 1, 4)
290
-                    && 'frame' !== substr($htmlArrayCurrent, 1, 5)
291
-                    && 'isindex' !== substr($htmlArrayCurrent, 1, 7)
292
-                    && 'param' !== substr($htmlArrayCurrent, 1, 5)
293
-                    && 'area' !== substr($htmlArrayCurrent, 1, 4)
294
-                    && 'base' !== substr($htmlArrayCurrent, 1, 4)
295
-                    && '<!' !== substr($htmlArrayCurrent, 0, 2)
296
-                    && '<?xml' !== substr($htmlArrayCurrent, 0, 5)
297
-                ) {
298
-                    ++$tabs;
299
-                }
300
-            }
301
-        }
302
-
303
-        // Remove empty lines
304
-        if ($this->formatType > 1) {
305
-            $this->removeEmptyLines($html);
306
-        }
307
-
308
-        // Restore saved comments, styles and scripts
309
-        for ($i = 0; $i < \count($noFormat); ++$i) {
310
-            $html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
311
-        }
312
-
313
-        // include debug comment at the end
314
-        if (0 != $tabs && true === $this->debugComment) {
315
-            $html .= "<!-- {$tabs} open elements found -->";
316
-        }
317
-
318
-        return $html;
319
-    }
320
-
321
-    /**
322
-     * Remove ALL line breaks and multiple white space.
323
-     */
324
-    protected function killLineBreaks(string $html): string
325
-    {
326
-        $html = str_replace($this->newline, '', $html);
327
-
328
-        return preg_replace('/\s\s+/u', ' ', $html);
329
-        // ? return preg_replace('/\n|\s+(\s)/u', '$1', $html);
330
-    }
331
-
332
-    /**
333
-     * Remove multiple white space, keeps line breaks.
334
-     */
335
-    protected function killWhiteSpace(string $html): string
336
-    {
337
-        $temp = explode($this->newline, $html);
338
-        for ($i = 0; $i < \count($temp); ++$i) {
339
-            if (!trim($temp[$i])) {
340
-                unset($temp[$i]);
341
-                continue;
342
-            }
343
-
344
-            $temp[$i] = trim($temp[$i]);
345
-            $temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]);
346
-        }
347
-
348
-        return implode($this->newline, $temp);
349
-    }
350
-
351
-    /**
352
-     * Remove white space at the end of lines, keeps other white space and line breaks.
353
-     */
354
-    protected function rTrimLines(string &$html): void
355
-    {
356
-        $html = preg_replace('/\s+$/m', '', $html);
357
-    }
358
-
359
-    /**
360
-     * Convert newlines according to the current OS.
361
-     */
362
-    protected function convNlOs(string &$html): void
363
-    {
364
-        $html = preg_replace("(\r\n|\r)", $this->newline, $html);
365
-    }
366
-
367
-    /**
368
-     * Remove empty lines.
369
-     */
370
-    protected function removeEmptyLines(string &$html): void
371
-    {
372
-        $temp = explode($this->newline, $html);
373
-        $result = [];
374
-        for ($i = 0; $i < \count($temp); ++$i) {
375
-            if ('' == trim($temp[$i])) {
376
-                continue;
377
-            }
378
-            $result[] = $temp[$i];
379
-        }
380
-        $html = implode($this->newline, $result);
381
-    }
382
-
383
-    /**
384
-     * Include configured header comment in HTML content block.
385
-     */
386
-    public function includeHeaderComment(string &$html): void
387
-    {
388
-        $html = preg_replace('/^(-->)$/m', "\n\t" . $this->headerComment . "\n$1", $html);
389
-    }
20
+	/**
21
+	 * Enable Debug comment in footer.
22
+	 */
23
+	protected bool $debugComment = false;
24
+
25
+	/**
26
+	 * Format Type.
27
+	 */
28
+	protected int $formatType = 0;
29
+
30
+	/**
31
+	 * Tab character.
32
+	 */
33
+	protected string $tab = "\t";
34
+
35
+	/**
36
+	 * Newline character.
37
+	 */
38
+	protected string $newline = "\n";
39
+
40
+	/**
41
+	 * Configured extra header comment.
42
+	 */
43
+	protected string $headerComment = '';
44
+
45
+	/**
46
+	 * Empty space char.
47
+	 */
48
+	protected string $emptySpaceChar = ' ';
49
+
50
+	/**
51
+	 * Set variables based on given config.
52
+	 */
53
+	public function setVariables(array $config): void
54
+	{
55
+		if (isset($config['headerComment']) && !empty($config['headerComment'])) {
56
+			$this->headerComment = $config['headerComment'];
57
+		}
58
+
59
+		if (isset($config['formatHtml']) && is_numeric($config['formatHtml'])) {
60
+			$this->formatType = (int) $config['formatHtml'];
61
+		}
62
+
63
+		if (isset($config['formatHtml.']['tabSize']) && is_numeric($config['formatHtml.']['tabSize'])) {
64
+			$this->tab = str_pad('', (int) $config['formatHtml.']['tabSize'], ' ');
65
+		}
66
+
67
+		if (isset($config['formatHtml.']['debugComment'])) {
68
+			$this->debugComment = (bool) $config['formatHtml.']['debugComment'];
69
+		}
70
+
71
+		if (isset($config['dropEmptySpaceChar']) && (bool) $config['dropEmptySpaceChar']) {
72
+			$this->emptySpaceChar = '';
73
+		}
74
+	}
75
+
76
+	/**
77
+	 * Clean given HTML with formatter.
78
+	 */
79
+	public function clean(string $html, array $config = []): string
80
+	{
81
+		if (!empty($config)) {
82
+			$this->setVariables($config);
83
+		}
84
+
85
+		// convert line-breaks to UNIX
86
+		$this->convNlOs($html);
87
+
88
+		$manipulations = [];
89
+
90
+		if (isset($config['removeGenerator']) && (bool) $config['removeGenerator']) {
91
+			$manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class);
92
+		}
93
+
94
+		if (isset($config['removeComments']) && (bool) $config['removeComments']) {
95
+			$manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class);
96
+		}
97
+
98
+		if (!empty($this->headerComment)) {
99
+			$this->includeHeaderComment($html);
100
+		}
101
+
102
+		foreach ($manipulations as $key => $manipulation) {
103
+			/** @var ManipulationInterface $manipulation */
104
+			$configuration = isset($config[$key . '.']) && \is_array($config[$key . '.']) ? $config[$key . '.'] : [];
105
+			$html = $manipulation->manipulate($html, $configuration);
106
+		}
107
+
108
+		// cleanup HTML5 self-closing elements
109
+		if (!isset($GLOBALS['TSFE']->config['config']['doctype'])
110
+			|| 'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'], 0, 1)) {
111
+			$html = preg_replace(
112
+				'/<((?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)\s[^>]+?)\s*\\\?\/>/',
113
+				'<$1>',
114
+				$html
115
+			);
116
+		}
117
+
118
+		if ($this->formatType > 0) {
119
+			$html = $this->formatHtml($html);
120
+		}
121
+
122
+		// remove white space after line ending
123
+		$this->rTrimLines($html);
124
+
125
+		// recover line-breaks
126
+		if (Environment::isWindows()) {
127
+			$html = str_replace($this->newline, "\r\n", $html);
128
+		}
129
+
130
+		return (string) $html;
131
+	}
132
+
133
+	/**
134
+	 * Formats the (X)HTML code:
135
+	 *  - taps according to the hirarchy of the tags
136
+	 *  - removes empty spaces between tags
137
+	 *  - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..)
138
+	 *  choose from five options:
139
+	 *    0 => off
140
+	 *    1 => no line break at all  (code in one line)
141
+	 *    2 => minimalistic line breaks (structure defining box-elements)
142
+	 *    3 => aesthetic line breaks (important box-elements)
143
+	 *    4 => logic line breaks (all box-elements)
144
+	 *    5 => max line breaks (all elements).
145
+	 */
146
+	protected function formatHtml(string $html): string
147
+	{
148
+		// Save original formated pre, textarea, comments, styles and scripts & replace them with markers
149
+		preg_match_all(
150
+			'/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im',
151
+			$html,
152
+			$matches
153
+		);
154
+		$noFormat = $matches[0]; // do not format these block elements
155
+		for ($i = 0; $i < \count($noFormat); ++$i) {
156
+			$html = str_replace($noFormat[$i], "\n<!-- ELEMENT {$i} -->", $html);
157
+		}
158
+
159
+		// define box elements for formatting
160
+		$trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section';
161
+		$functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
162
+		$usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
163
+		$imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
164
+		$allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')';
165
+		$esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
166
+		$structureBoxLikeElements = '(?>html|head|body|div|!--)';
167
+
168
+		// split html into it's elements
169
+		$htmlArrayTemp = preg_split(
170
+			'/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
171
+			$html,
172
+			-1,
173
+			\PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY
174
+		);
175
+
176
+		if (false === $htmlArrayTemp) {
177
+			// Restore saved comments, styles and scripts
178
+			for ($i = 0; $i < \count($noFormat); ++$i) {
179
+				$html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
180
+			}
181
+
182
+			return $html;
183
+		}
184
+		// remove empty lines
185
+		$htmlArray = [''];
186
+		$index = 1;
187
+		for ($x = 0; $x < \count($htmlArrayTemp); ++$x) {
188
+			$text = trim($htmlArrayTemp[$x]);
189
+			$htmlArray[$index] = '' !== $text ? $htmlArrayTemp[$x] : $this->emptySpaceChar;
190
+			++$index;
191
+		}
192
+
193
+		// rebuild html
194
+		$html = '';
195
+		$tabs = 0;
196
+		for ($x = 0; $x < \count($htmlArray); ++$x) {
197
+			$htmlArrayBefore = $htmlArray[$x - 1] ?? '';
198
+			$htmlArrayCurrent = $htmlArray[$x] ?? '';
199
+
200
+			// check if the element should stand in a new line
201
+			$newline = false;
202
+			if ('<?xml' == substr($htmlArrayBefore, 0, 5)) {
203
+				$newline = true;
204
+			} elseif (2 == $this->formatType && ( // minimalistic line break
205
+				// this element has a line break before itself
206
+				preg_match(
207
+					'/<' . $structureBoxLikeElements . '(.*)>/Usi',
208
+					$htmlArrayCurrent
209
+				) || preg_match(
210
+					'/<' . $structureBoxLikeElements . '(.*) \/>/Usi',
211
+					$htmlArrayCurrent
212
+				) // one element before is a element that has a line break after
213
+				|| preg_match(
214
+					'/<\/' . $structureBoxLikeElements . '(.*)>/Usi',
215
+					$htmlArrayBefore
216
+				) || '<!--' == substr(
217
+					$htmlArrayBefore,
218
+					0,
219
+					4
220
+				) || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArrayBefore))
221
+			) {
222
+				$newline = true;
223
+			} elseif (3 == $this->formatType && ( // aestetic line break
224
+				// this element has a line break before itself
225
+				preg_match(
226
+					'/<' . $esteticBoxLikeElements . '(.*)>/Usi',
227
+					$htmlArrayCurrent
228
+				) || preg_match(
229
+					'/<' . $esteticBoxLikeElements . '(.*) \/>/Usi',
230
+					$htmlArrayCurrent
231
+				) // one element before is a element that has a line break after
232
+				|| preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
233
+					$htmlArrayBefore,
234
+					0,
235
+					4
236
+				) || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArrayBefore))
237
+			) {
238
+				$newline = true;
239
+			} elseif ($this->formatType >= 4 && ( // logical line break
240
+				// this element has a line break before itself
241
+				preg_match(
242
+					'/<' . $allBoxLikeElements . '(.*)>/Usi',
243
+					$htmlArrayCurrent
244
+				) || preg_match(
245
+					'/<' . $allBoxLikeElements . '(.*) \/>/Usi',
246
+					$htmlArrayCurrent
247
+				) // one element before is a element that has a line break after
248
+				|| preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
249
+					$htmlArrayBefore,
250
+					0,
251
+					4
252
+				) || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArrayBefore))
253
+			) {
254
+				$newline = true;
255
+			}
256
+
257
+			// count down a tab
258
+			if ('</' == substr($htmlArrayCurrent, 0, 2)) {
259
+				--$tabs;
260
+			}
261
+
262
+			// add tabs and line breaks in front of the current tag
263
+			if ($newline) {
264
+				$html .= $this->newline;
265
+				for ($y = 0; $y < $tabs; ++$y) {
266
+					$html .= $this->tab;
267
+				}
268
+			}
269
+
270
+			// remove white spaces and line breaks and add current tag to the html-string
271
+			if ('<![CDATA[' == substr($htmlArrayCurrent, 0, 9) // remove multiple white space in CDATA / XML
272
+				|| '<?xml' == substr($htmlArrayCurrent, 0, 5)
273
+			) {
274
+				$html .= $this->killWhiteSpace($htmlArrayCurrent);
275
+			} else { // remove all line breaks
276
+				$html .= $this->killLineBreaks($htmlArrayCurrent);
277
+			}
278
+
279
+			// count up a tab
280
+			if ('<' == substr($htmlArrayCurrent, 0, 1) && '/' != substr($htmlArrayCurrent, 1, 1)) {
281
+				if (' ' !== substr($htmlArrayCurrent, 1, 1)
282
+					&& 'img' !== substr($htmlArrayCurrent, 1, 3)
283
+					&& 'source' !== substr($htmlArrayCurrent, 1, 6)
284
+					&& 'br' !== substr($htmlArrayCurrent, 1, 2)
285
+					&& 'hr' !== substr($htmlArrayCurrent, 1, 2)
286
+					&& 'input' !== substr($htmlArrayCurrent, 1, 5)
287
+					&& 'link' !== substr($htmlArrayCurrent, 1, 4)
288
+					&& 'meta' !== substr($htmlArrayCurrent, 1, 4)
289
+					&& 'col ' !== substr($htmlArrayCurrent, 1, 4)
290
+					&& 'frame' !== substr($htmlArrayCurrent, 1, 5)
291
+					&& 'isindex' !== substr($htmlArrayCurrent, 1, 7)
292
+					&& 'param' !== substr($htmlArrayCurrent, 1, 5)
293
+					&& 'area' !== substr($htmlArrayCurrent, 1, 4)
294
+					&& 'base' !== substr($htmlArrayCurrent, 1, 4)
295
+					&& '<!' !== substr($htmlArrayCurrent, 0, 2)
296
+					&& '<?xml' !== substr($htmlArrayCurrent, 0, 5)
297
+				) {
298
+					++$tabs;
299
+				}
300
+			}
301
+		}
302
+
303
+		// Remove empty lines
304
+		if ($this->formatType > 1) {
305
+			$this->removeEmptyLines($html);
306
+		}
307
+
308
+		// Restore saved comments, styles and scripts
309
+		for ($i = 0; $i < \count($noFormat); ++$i) {
310
+			$html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
311
+		}
312
+
313
+		// include debug comment at the end
314
+		if (0 != $tabs && true === $this->debugComment) {
315
+			$html .= "<!-- {$tabs} open elements found -->";
316
+		}
317
+
318
+		return $html;
319
+	}
320
+
321
+	/**
322
+	 * Remove ALL line breaks and multiple white space.
323
+	 */
324
+	protected function killLineBreaks(string $html): string
325
+	{
326
+		$html = str_replace($this->newline, '', $html);
327
+
328
+		return preg_replace('/\s\s+/u', ' ', $html);
329
+		// ? return preg_replace('/\n|\s+(\s)/u', '$1', $html);
330
+	}
331
+
332
+	/**
333
+	 * Remove multiple white space, keeps line breaks.
334
+	 */
335
+	protected function killWhiteSpace(string $html): string
336
+	{
337
+		$temp = explode($this->newline, $html);
338
+		for ($i = 0; $i < \count($temp); ++$i) {
339
+			if (!trim($temp[$i])) {
340
+				unset($temp[$i]);
341
+				continue;
342
+			}
343
+
344
+			$temp[$i] = trim($temp[$i]);
345
+			$temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]);
346
+		}
347
+
348
+		return implode($this->newline, $temp);
349
+	}
350
+
351
+	/**
352
+	 * Remove white space at the end of lines, keeps other white space and line breaks.
353
+	 */
354
+	protected function rTrimLines(string &$html): void
355
+	{
356
+		$html = preg_replace('/\s+$/m', '', $html);
357
+	}
358
+
359
+	/**
360
+	 * Convert newlines according to the current OS.
361
+	 */
362
+	protected function convNlOs(string &$html): void
363
+	{
364
+		$html = preg_replace("(\r\n|\r)", $this->newline, $html);
365
+	}
366
+
367
+	/**
368
+	 * Remove empty lines.
369
+	 */
370
+	protected function removeEmptyLines(string &$html): void
371
+	{
372
+		$temp = explode($this->newline, $html);
373
+		$result = [];
374
+		for ($i = 0; $i < \count($temp); ++$i) {
375
+			if ('' == trim($temp[$i])) {
376
+				continue;
377
+			}
378
+			$result[] = $temp[$i];
379
+		}
380
+		$html = implode($this->newline, $result);
381
+	}
382
+
383
+	/**
384
+	 * Include configured header comment in HTML content block.
385
+	 */
386
+	public function includeHeaderComment(string &$html): void
387
+	{
388
+		$html = preg_replace('/^(-->)$/m', "\n\t" . $this->headerComment . "\n$1", $html);
389
+	}
390 390
 }
Please login to merge, or discard this patch.
Spacing   +16 added lines, -16 removed lines patch added patch discarded remove patch
@@ -101,7 +101,7 @@  discard block
 block discarded – undo
101 101
 
102 102
         foreach ($manipulations as $key => $manipulation) {
103 103
             /** @var ManipulationInterface $manipulation */
104
-            $configuration = isset($config[$key . '.']) && \is_array($config[$key . '.']) ? $config[$key . '.'] : [];
104
+            $configuration = isset($config[$key.'.']) && \is_array($config[$key.'.']) ? $config[$key.'.'] : [];
105 105
             $html = $manipulation->manipulate($html, $configuration);
106 106
         }
107 107
 
@@ -161,7 +161,7 @@  discard block
 block discarded – undo
161 161
         $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
162 162
         $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
163 163
         $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
164
-        $allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')';
164
+        $allBoxLikeElements = '(?>'.$trueBoxElements.'|'.$functionalBoxElements.'|'.$usableBoxElements.'|'.$imagineBoxElements.')';
165 165
         $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
166 166
         $structureBoxLikeElements = '(?>html|head|body|div|!--)';
167 167
 
@@ -170,7 +170,7 @@  discard block
 block discarded – undo
170 170
             '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
171 171
             $html,
172 172
             -1,
173
-            \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY
173
+            \PREG_SPLIT_DELIM_CAPTURE|\PREG_SPLIT_NO_EMPTY
174 174
         );
175 175
 
176 176
         if (false === $htmlArrayTemp) {
@@ -204,52 +204,52 @@  discard block
 block discarded – undo
204 204
             } elseif (2 == $this->formatType && ( // minimalistic line break
205 205
                 // this element has a line break before itself
206 206
                 preg_match(
207
-                    '/<' . $structureBoxLikeElements . '(.*)>/Usi',
207
+                    '/<'.$structureBoxLikeElements.'(.*)>/Usi',
208 208
                     $htmlArrayCurrent
209 209
                 ) || preg_match(
210
-                    '/<' . $structureBoxLikeElements . '(.*) \/>/Usi',
210
+                    '/<'.$structureBoxLikeElements.'(.*) \/>/Usi',
211 211
                     $htmlArrayCurrent
212 212
                 ) // one element before is a element that has a line break after
213 213
                 || preg_match(
214
-                    '/<\/' . $structureBoxLikeElements . '(.*)>/Usi',
214
+                    '/<\/'.$structureBoxLikeElements.'(.*)>/Usi',
215 215
                     $htmlArrayBefore
216 216
                 ) || '<!--' == substr(
217 217
                     $htmlArrayBefore,
218 218
                     0,
219 219
                     4
220
-                ) || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArrayBefore))
220
+                ) || preg_match('/<'.$structureBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore))
221 221
             ) {
222 222
                 $newline = true;
223 223
             } elseif (3 == $this->formatType && ( // aestetic line break
224 224
                 // this element has a line break before itself
225 225
                 preg_match(
226
-                    '/<' . $esteticBoxLikeElements . '(.*)>/Usi',
226
+                    '/<'.$esteticBoxLikeElements.'(.*)>/Usi',
227 227
                     $htmlArrayCurrent
228 228
                 ) || preg_match(
229
-                    '/<' . $esteticBoxLikeElements . '(.*) \/>/Usi',
229
+                    '/<'.$esteticBoxLikeElements.'(.*) \/>/Usi',
230 230
                     $htmlArrayCurrent
231 231
                 ) // one element before is a element that has a line break after
232
-                || preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
232
+                || preg_match('/<\/'.$esteticBoxLikeElements.'(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
233 233
                     $htmlArrayBefore,
234 234
                     0,
235 235
                     4
236
-                ) || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArrayBefore))
236
+                ) || preg_match('/<'.$esteticBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore))
237 237
             ) {
238 238
                 $newline = true;
239 239
             } elseif ($this->formatType >= 4 && ( // logical line break
240 240
                 // this element has a line break before itself
241 241
                 preg_match(
242
-                    '/<' . $allBoxLikeElements . '(.*)>/Usi',
242
+                    '/<'.$allBoxLikeElements.'(.*)>/Usi',
243 243
                     $htmlArrayCurrent
244 244
                 ) || preg_match(
245
-                    '/<' . $allBoxLikeElements . '(.*) \/>/Usi',
245
+                    '/<'.$allBoxLikeElements.'(.*) \/>/Usi',
246 246
                     $htmlArrayCurrent
247 247
                 ) // one element before is a element that has a line break after
248
-                || preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
248
+                || preg_match('/<\/'.$allBoxLikeElements.'(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
249 249
                     $htmlArrayBefore,
250 250
                     0,
251 251
                     4
252
-                ) || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArrayBefore))
252
+                ) || preg_match('/<'.$allBoxLikeElements.'(.*) \/>/Usi', $htmlArrayBefore))
253 253
             ) {
254 254
                 $newline = true;
255 255
             }
@@ -385,6 +385,6 @@  discard block
 block discarded – undo
385 385
      */
386 386
     public function includeHeaderComment(string &$html): void
387 387
     {
388
-        $html = preg_replace('/^(-->)$/m', "\n\t" . $this->headerComment . "\n$1", $html);
388
+        $html = preg_replace('/^(-->)$/m', "\n\t".$this->headerComment."\n$1", $html);
389 389
     }
390 390
 }
Please login to merge, or discard this patch.