Completed
Push — master ( fccbd3...03a110 )
by Tim
13s queued 11s
created
Classes/Service/CleanHtmlService.php 1 patch
Indentation   +513 added lines, -513 removed lines patch added patch discarded remove patch
@@ -17,517 +17,517 @@
 block discarded – undo
17 17
 class CleanHtmlService implements SingletonInterface
18 18
 {
19 19
 
20
-    /**
21
-     * Enable Debug comment in footer
22
-     *
23
-     * @var boolean
24
-     */
25
-    protected $debugComment = false;
26
-
27
-    /**
28
-     * Format Type
29
-     *
30
-     * @var integer
31
-     */
32
-    protected $formatType = 0;
33
-
34
-    /**
35
-     * Tab character
36
-     *
37
-     * @var string
38
-     */
39
-    protected $tab = "\t";
40
-
41
-    /**
42
-     * Newline character
43
-     *
44
-     * @var string
45
-     */
46
-    protected $newline = "\n";
47
-
48
-    /**
49
-     * Configured extra header comment
50
-     *
51
-     * @var string
52
-     */
53
-    protected $headerComment = '';
54
-
55
-    /**
56
-     * Empty space char
57
-     * @var string
58
-     */
59
-    protected $emptySpaceChar = ' ';
60
-
61
-    /**
62
-     * Set variables based on given config
63
-     *
64
-     * @param array $config
65
-     *
66
-     * @return void
67
-     */
68
-    public function setVariables(array $config)
69
-    {
70
-        // Set newline based on OS
71
-        if (Environment::isWindows()) {
72
-            $this->newline = "\r\n";
73
-        } else {
74
-            $this->newline = "\n";
75
-        }
76
-
77
-        if (!empty($config)) {
78
-            if ($config['formatHtml'] && is_numeric($config['formatHtml'])) {
79
-                $this->formatType = (int)$config['formatHtml'];
80
-            }
81
-
82
-            if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) {
83
-                $this->tab = str_pad('', $config['formatHtml.']['tabSize'], ' ');
84
-            }
85
-
86
-            if (isset($config['formatHtml.']['debugComment'])) {
87
-                $this->debugComment = (bool)$config['formatHtml.']['debugComment'];
88
-            }
89
-
90
-            if (isset($config['headerComment'])) {
91
-                $this->headerComment = $config['headerComment'];
92
-            }
93
-
94
-            if (isset($config['dropEmptySpaceChar']) && (bool)$config['dropEmptySpaceChar']) {
95
-                $this->emptySpaceChar = '';
96
-            }
97
-        }
98
-    }
99
-
100
-    /**
101
-     * Clean given HTML with formatter
102
-     *
103
-     * @param string $html
104
-     * @param array $config
105
-     *
106
-     * @return string
107
-     */
108
-    public function clean($html, $config = [])
109
-    {
110
-        if (!empty($config)) {
111
-            if ((bool)$config['enabled'] === false) {
112
-                return $html;
113
-            }
114
-
115
-            $this->setVariables($config);
116
-        }
117
-
118
-        $manipulations = [];
119
-
120
-        if (isset($config['removeGenerator']) && (bool)$config['removeGenerator']) {
121
-            $manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class);
122
-        }
123
-
124
-        if (isset($config['removeComments']) && (bool)$config['removeComments']) {
125
-            $manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class);
126
-        }
127
-
128
-        if (isset($config['removeBlurScript']) && (bool)$config['removeBlurScript']) {
129
-            $manipulations['removeBlurScript'] = GeneralUtility::makeInstance(RemoveBlurScript::class);
130
-        }
131
-
132
-        if (!empty($this->headerComment)) {
133
-            $this->includeHeaderComment($html);
134
-        }
135
-
136
-        foreach ($manipulations as $key => $manipulation) {
137
-            /** @var ManipulationInterface $manipulation */
138
-            $configuration = isset($config[$key . '.']) && is_array($config[$key . '.']) ? $config[$key . '.'] : [];
139
-            $html = $manipulation->manipulate($html, $configuration);
140
-        }
141
-
142
-        if ($this->formatType > 0) {
143
-            $html = $this->formatHtml($html);
144
-        }
145
-
146
-        return $html;
147
-    }
148
-
149
-    /**
150
-     * Formats the (X)HTML code:
151
-     *  - taps according to the hirarchy of the tags
152
-     *  - removes empty spaces between tags
153
-     *  - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..)
154
-     *  choose from five options:
155
-     *    0 => off
156
-     *    1 => no line break at all  (code in one line)
157
-     *    2 => minimalistic line breaks (structure defining box-elements)
158
-     *    3 => aesthetic line breaks (important box-elements)
159
-     *    4 => logic line breaks (all box-elements)
160
-     *    5 => max line breaks (all elements)
161
-     *
162
-     * @param string $html
163
-     *
164
-     * @return string
165
-     */
166
-    protected function formatHtml($html)
167
-    {
168
-        // Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers
169
-        preg_match_all(
170
-            '/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im',
171
-            $html,
172
-            $matches
173
-        );
174
-        $noFormat = $matches[0]; // do not format these block elements
175
-        for ($i = 0; $i < count($noFormat); $i++) {
176
-            $html = str_replace($noFormat[$i], "\n<!-- ELEMENT $i -->", $html);
177
-        }
178
-
179
-        // define box elements for formatting
180
-        $trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section';
181
-        $functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
182
-        $usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
183
-        $imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
184
-        $allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')';
185
-        $esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
186
-        $structureBoxLikeElements = '(?>html|head|body|div|!--)';
187
-
188
-        // split html into it's elements
189
-        $htmlArrayTemp = preg_split(
190
-            '/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
191
-            $html,
192
-            -1,
193
-            PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY
194
-        );
195
-
196
-        if ($htmlArrayTemp === false) {
197
-            // Restore saved comments, styles and java-scripts
198
-            for ($i = 0; $i < count($noFormat); $i++) {
199
-                $noFormat[$i] = $this->rTrimLines($noFormat[$i]); // remove white space after line ending
200
-                $html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html);
201
-            }
202
-            return $html;
203
-        }
204
-        // remove empty lines
205
-        $htmlArray = [''];
206
-        $z = 1;
207
-        for ($x = 0; $x < count($htmlArrayTemp); $x++) {
208
-            $t = trim($htmlArrayTemp[$x]);
209
-            if ($t !== '') {
210
-                $htmlArray[$z] = $htmlArrayTemp[$x];
211
-                $z++;
212
-            } else {
213
-                $htmlArray[$z] = $this->emptySpaceChar;
214
-                $z++;
215
-            }
216
-        }
217
-
218
-        // rebuild html
219
-        $html = '';
220
-        $tabs = 0;
221
-        for ($x = 0; $x < count($htmlArray); $x++) {
222
-            // check if the element should stand in a new line
223
-            $newline = false;
224
-            if (substr($htmlArray[$x - 1], 0, 5) == '<?xml') {
225
-                $newline = true;
226
-            } elseif ($this->formatType == 2 && ( // minimalistic line break
227
-                    # this element has a line break before itself
228
-                    preg_match(
229
-                        '/<' . $structureBoxLikeElements . '(.*)>/Usi',
230
-                        $htmlArray[$x]
231
-                    ) || preg_match(
232
-                        '/<' . $structureBoxLikeElements . '(.*) \/>/Usi',
233
-                        $htmlArray[$x]
234
-                    ) || # one element before is a element that has a line break after
235
-                    preg_match(
236
-                        '/<\/' . $structureBoxLikeElements . '(.*)>/Usi',
237
-                        $htmlArray[$x - 1]
238
-                    ) || substr(
239
-                        $htmlArray[$x - 1],
240
-                        0,
241
-                        4
242
-                    ) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
243
-            ) {
244
-                $newline = true;
245
-            } elseif ($this->formatType == 3 && ( // aestetic line break
246
-                    # this element has a line break before itself
247
-                    preg_match(
248
-                        '/<' . $esteticBoxLikeElements . '(.*)>/Usi',
249
-                        $htmlArray[$x]
250
-                    ) || preg_match(
251
-                        '/<' . $esteticBoxLikeElements . '(.*) \/>/Usi',
252
-                        $htmlArray[$x]
253
-                    ) || # one element before is a element that has a line break after
254
-                    preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr(
255
-                        $htmlArray[$x - 1],
256
-                        0,
257
-                        4
258
-                    ) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
259
-            ) {
260
-                $newline = true;
261
-            } elseif ($this->formatType >= 4 && ( // logical line break
262
-                    # this element has a line break before itself
263
-                    preg_match(
264
-                        '/<' . $allBoxLikeElements . '(.*)>/Usi',
265
-                        $htmlArray[$x]
266
-                    ) || preg_match(
267
-                        '/<' . $allBoxLikeElements . '(.*) \/>/Usi',
268
-                        $htmlArray[$x]
269
-                    ) || # one element before is a element that has a line break after
270
-                    preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr(
271
-                        $htmlArray[$x - 1],
272
-                        0,
273
-                        4
274
-                    ) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
275
-            ) {
276
-                $newline = true;
277
-            }
278
-
279
-            // count down a tab
280
-            if (substr($htmlArray[$x], 0, 2) == '</') {
281
-                $tabs--;
282
-            }
283
-
284
-            // add tabs and line breaks in front of the current tag
285
-            if ($newline) {
286
-                $html .= $this->newline;
287
-                for ($y = 0; $y < $tabs; $y++) {
288
-                    $html .= $this->tab;
289
-                }
290
-            }
291
-
292
-            // remove white spaces and line breaks and add current tag to the html-string
293
-            if (substr($htmlArray[$x - 1], 0, 4) == '<pre' // remove white space after line ending in PRE / TEXTAREA / comment
294
-                || substr($htmlArray[$x - 1], 0, 9) == '<textarea' || substr($htmlArray[$x - 1], 0, 4) == '<!--'
295
-            ) {
296
-                $html .= $this->rTrimLines($htmlArray[$x]);
297
-            } elseif (substr($htmlArray[$x], 0, 9) == '<![CDATA[' // remove multiple white space in CDATA / XML
298
-                || substr($htmlArray[$x], 0, 5) == '<?xml'
299
-            ) {
300
-                $html .= $this->killWhiteSpace($htmlArray[$x]);
301
-            } else { // remove all line breaks
302
-                $html .= $this->killLineBreaks($htmlArray[$x]);
303
-            }
304
-
305
-            // count up a tab
306
-            if (substr($htmlArray[$x], 0, 1) == '<' && substr($htmlArray[$x], 1, 1) != '/') {
307
-                if (
308
-                    substr($htmlArray[$x], 1, 1) !== ' '
309
-                    && substr($htmlArray[$x], 1, 3) !== 'img'
310
-                    && substr($htmlArray[$x], 1, 6) !== 'source'
311
-                    && substr($htmlArray[$x], 1, 2) !== 'br'
312
-                    && substr($htmlArray[$x], 1, 2) !== 'hr'
313
-                    && substr($htmlArray[$x], 1, 5) !== 'input'
314
-                    && substr($htmlArray[$x], 1, 4) !== 'link'
315
-                    && substr($htmlArray[$x], 1, 4) !== 'meta'
316
-                    && substr($htmlArray[$x], 1, 4) !== 'col '
317
-                    && substr($htmlArray[$x], 1, 5) !== 'frame'
318
-                    && substr($htmlArray[$x], 1, 7) !== 'isindex'
319
-                    && substr($htmlArray[$x], 1, 5) !== 'param'
320
-                    && substr($htmlArray[$x], 1, 4) !== 'area'
321
-                    && substr($htmlArray[$x], 1, 4) !== 'base'
322
-                    && substr($htmlArray[$x], 0, 2) !== '<!'
323
-                    && substr($htmlArray[$x], 0, 5) !== '<?xml'
324
-                ) {
325
-                    $tabs++;
326
-                }
327
-            }
328
-        }
329
-
330
-        // Remove empty lines
331
-        if ($this->formatType > 1) {
332
-            $this->removeEmptyLines($html);
333
-        }
334
-
335
-        // Restore saved comments, styles and java-scripts
336
-        for ($i = 0; $i < count($noFormat); $i++) {
337
-            $noFormat[$i] = $this->rTrimLines($noFormat[$i]); // remove white space after line ending
338
-            $html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html);
339
-        }
340
-
341
-        // include debug comment at the end
342
-        if ($tabs != 0 && $this->debugComment === true) {
343
-            $html .= '<!--' . $tabs . " open elements found-->\r\n";
344
-        }
345
-
346
-        return $html;
347
-    }
348
-
349
-    /**
350
-     * Remove ALL line breaks and multiple white space
351
-     *
352
-     * @param string $html
353
-     *
354
-     * @return string
355
-     */
356
-    protected function killLineBreaks($html)
357
-    {
358
-        $html = $this->convNlOs($html);
359
-        $html = str_replace($this->newline, "", $html);
360
-        $html = preg_replace('/\s\s+/u', ' ', $html);
361
-        return $html;
362
-    }
363
-
364
-    /**
365
-     * Remove multiple white space, keeps line breaks
366
-     *
367
-     * @param string $html
368
-     *
369
-     * @return string
370
-     */
371
-    protected function killWhiteSpace($html)
372
-    {
373
-        $html = $this->convNlOs($html);
374
-        $temp = explode($this->newline, $html);
375
-        for ($i = 0; $i < count($temp); $i++) {
376
-            if (!trim($temp[$i])) {
377
-                unset($temp[$i]);
378
-            } else {
379
-                $temp[$i] = trim($temp[$i]);
380
-                $temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]);
381
-            }
382
-        }
383
-        $html = implode($this->newline, $temp);
384
-        return $html;
385
-    }
386
-
387
-    /**
388
-     * Remove white space at the end of lines, keeps other white space and line breaks
389
-     *
390
-     * @param string $html
391
-     *
392
-     * @return string
393
-     */
394
-    protected function rTrimLines($html)
395
-    {
396
-        $html = $this->convNlOs($html);
397
-        $temp = explode($this->newline, $html);
398
-        for ($i = 0; $i < count($temp); $i++) {
399
-            $temp[$i] = rtrim($temp[$i]);
400
-        }
401
-        $html = implode($this->newline, $temp);
402
-        return $html;
403
-    }
404
-
405
-    /**
406
-     * Convert newlines according to the current OS
407
-     *
408
-     * @param string $html
409
-     *
410
-     * @return string
411
-     */
412
-    protected function convNlOs($html)
413
-    {
414
-        $html = preg_replace("(\r\n|\n|\r)", $this->newline, $html);
415
-        return $html;
416
-    }
417
-
418
-    /**
419
-     * Remove tabs and empty spaces before and after lines, transforms linebreaks system conform
420
-     *
421
-     * @param string $html Html-Code
422
-     *
423
-     * @return void
424
-     */
425
-    protected function trimLines(&$html)
426
-    {
427
-        $html = str_replace("\t", "", $html);
428
-        // convert newlines according to the current OS
429
-        if (Environment::isWindows()) {
430
-            $html = str_replace("\n", "\r\n", $html);
431
-        } else {
432
-            $html = str_replace("\r\n", "\n", $html);
433
-        }
434
-        $temp = explode($this->newline, $html);
435
-        $temp = array_map('trim', $temp);
436
-        $html = implode($this->newline, $temp);
437
-        unset($temp);
438
-    }
439
-
440
-    /**
441
-     * Remove empty lines
442
-     *
443
-     * @param string $html
444
-     *
445
-     * @return void
446
-     */
447
-    protected function removeEmptyLines(&$html)
448
-    {
449
-        $temp = explode($this->newline, $html);
450
-        $result = [];
451
-        for ($i = 0; $i < count($temp); ++$i) {
452
-            if ("" == trim($temp[$i])) {
453
-                continue;
454
-            }
455
-            $result[] = $temp[$i];
456
-        }
457
-        $html = implode($this->newline, $result);
458
-    }
459
-
460
-    /**
461
-     * Remove new lines where unnecessary
462
-     * spares line breaks within: pre, textarea, ...
463
-     *
464
-     * @param string $html
465
-     *
466
-     * @return void
467
-     */
468
-    protected function removeNewLines(&$html)
469
-    {
470
-        $splitArray = [
471
-            'textarea',
472
-            'pre'
473
-        ]; // eventuell auch: span, script, style
474
-        $peaces = preg_split('#(<(' . implode('|', $splitArray) . ').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE);
475
-        $html = "";
476
-        for ($i = 0; $i < count($peaces); $i++) {
477
-            if (($i + 1) % 3 == 0) {
478
-                continue;
479
-            }
480
-            $html .= (($i - 1) % 3 != 0) ? $this->killLineBreaks($peaces[$i]) : $peaces[$i];
481
-        }
482
-    }
483
-
484
-    /**
485
-     * Remove obsolete link schema
486
-     *
487
-     * @param string $html
488
-     *
489
-     * @return void
490
-     */
491
-    protected function removeLinkSchema(&$html)
492
-    {
493
-        $html = preg_replace("/<link rel=\"?schema.dc\"?.+?>/is", "", $html);
494
-    }
495
-
496
-    /**
497
-     * Remove empty alt tags
498
-     *
499
-     * @param string $html
500
-     *
501
-     * @return void
502
-     */
503
-    protected function removeEmptyAltAtr(&$html)
504
-    {
505
-        $html = str_replace("alt=\"\"", "", $html);
506
-    }
507
-
508
-    /**
509
-     * Remove broken links in <a> tags
510
-     *
511
-     * @param string $html
512
-     *
513
-     * @return void
514
-     */
515
-    protected function removeRealUrlBrokenRootLink(&$html)
516
-    {
517
-        $html = str_replace('href=".html"', 'href=""', $html);
518
-    }
519
-
520
-    /**
521
-     * Include configured header comment in HTML content block
522
-     *
523
-     * @param $html
524
-     */
525
-    public function includeHeaderComment(&$html)
526
-    {
527
-        if (!empty($this->headerComment)) {
528
-            $html = preg_replace_callback('/<meta http-equiv(.*)>/Usi', function ($matches) {
529
-                return trim($matches[0] . $this->newline . $this->tab . $this->tab . '<!-- ' . $this->headerComment . '-->');
530
-            }, $html, 1);
531
-        }
532
-    }
20
+	/**
21
+	 * Enable Debug comment in footer
22
+	 *
23
+	 * @var boolean
24
+	 */
25
+	protected $debugComment = false;
26
+
27
+	/**
28
+	 * Format Type
29
+	 *
30
+	 * @var integer
31
+	 */
32
+	protected $formatType = 0;
33
+
34
+	/**
35
+	 * Tab character
36
+	 *
37
+	 * @var string
38
+	 */
39
+	protected $tab = "\t";
40
+
41
+	/**
42
+	 * Newline character
43
+	 *
44
+	 * @var string
45
+	 */
46
+	protected $newline = "\n";
47
+
48
+	/**
49
+	 * Configured extra header comment
50
+	 *
51
+	 * @var string
52
+	 */
53
+	protected $headerComment = '';
54
+
55
+	/**
56
+	 * Empty space char
57
+	 * @var string
58
+	 */
59
+	protected $emptySpaceChar = ' ';
60
+
61
+	/**
62
+	 * Set variables based on given config
63
+	 *
64
+	 * @param array $config
65
+	 *
66
+	 * @return void
67
+	 */
68
+	public function setVariables(array $config)
69
+	{
70
+		// Set newline based on OS
71
+		if (Environment::isWindows()) {
72
+			$this->newline = "\r\n";
73
+		} else {
74
+			$this->newline = "\n";
75
+		}
76
+
77
+		if (!empty($config)) {
78
+			if ($config['formatHtml'] && is_numeric($config['formatHtml'])) {
79
+				$this->formatType = (int)$config['formatHtml'];
80
+			}
81
+
82
+			if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) {
83
+				$this->tab = str_pad('', $config['formatHtml.']['tabSize'], ' ');
84
+			}
85
+
86
+			if (isset($config['formatHtml.']['debugComment'])) {
87
+				$this->debugComment = (bool)$config['formatHtml.']['debugComment'];
88
+			}
89
+
90
+			if (isset($config['headerComment'])) {
91
+				$this->headerComment = $config['headerComment'];
92
+			}
93
+
94
+			if (isset($config['dropEmptySpaceChar']) && (bool)$config['dropEmptySpaceChar']) {
95
+				$this->emptySpaceChar = '';
96
+			}
97
+		}
98
+	}
99
+
100
+	/**
101
+	 * Clean given HTML with formatter
102
+	 *
103
+	 * @param string $html
104
+	 * @param array $config
105
+	 *
106
+	 * @return string
107
+	 */
108
+	public function clean($html, $config = [])
109
+	{
110
+		if (!empty($config)) {
111
+			if ((bool)$config['enabled'] === false) {
112
+				return $html;
113
+			}
114
+
115
+			$this->setVariables($config);
116
+		}
117
+
118
+		$manipulations = [];
119
+
120
+		if (isset($config['removeGenerator']) && (bool)$config['removeGenerator']) {
121
+			$manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class);
122
+		}
123
+
124
+		if (isset($config['removeComments']) && (bool)$config['removeComments']) {
125
+			$manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class);
126
+		}
127
+
128
+		if (isset($config['removeBlurScript']) && (bool)$config['removeBlurScript']) {
129
+			$manipulations['removeBlurScript'] = GeneralUtility::makeInstance(RemoveBlurScript::class);
130
+		}
131
+
132
+		if (!empty($this->headerComment)) {
133
+			$this->includeHeaderComment($html);
134
+		}
135
+
136
+		foreach ($manipulations as $key => $manipulation) {
137
+			/** @var ManipulationInterface $manipulation */
138
+			$configuration = isset($config[$key . '.']) && is_array($config[$key . '.']) ? $config[$key . '.'] : [];
139
+			$html = $manipulation->manipulate($html, $configuration);
140
+		}
141
+
142
+		if ($this->formatType > 0) {
143
+			$html = $this->formatHtml($html);
144
+		}
145
+
146
+		return $html;
147
+	}
148
+
149
+	/**
150
+	 * Formats the (X)HTML code:
151
+	 *  - taps according to the hirarchy of the tags
152
+	 *  - removes empty spaces between tags
153
+	 *  - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..)
154
+	 *  choose from five options:
155
+	 *    0 => off
156
+	 *    1 => no line break at all  (code in one line)
157
+	 *    2 => minimalistic line breaks (structure defining box-elements)
158
+	 *    3 => aesthetic line breaks (important box-elements)
159
+	 *    4 => logic line breaks (all box-elements)
160
+	 *    5 => max line breaks (all elements)
161
+	 *
162
+	 * @param string $html
163
+	 *
164
+	 * @return string
165
+	 */
166
+	protected function formatHtml($html)
167
+	{
168
+		// Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers
169
+		preg_match_all(
170
+			'/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im',
171
+			$html,
172
+			$matches
173
+		);
174
+		$noFormat = $matches[0]; // do not format these block elements
175
+		for ($i = 0; $i < count($noFormat); $i++) {
176
+			$html = str_replace($noFormat[$i], "\n<!-- ELEMENT $i -->", $html);
177
+		}
178
+
179
+		// define box elements for formatting
180
+		$trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section';
181
+		$functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
182
+		$usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
183
+		$imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
184
+		$allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')';
185
+		$esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
186
+		$structureBoxLikeElements = '(?>html|head|body|div|!--)';
187
+
188
+		// split html into it's elements
189
+		$htmlArrayTemp = preg_split(
190
+			'/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
191
+			$html,
192
+			-1,
193
+			PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY
194
+		);
195
+
196
+		if ($htmlArrayTemp === false) {
197
+			// Restore saved comments, styles and java-scripts
198
+			for ($i = 0; $i < count($noFormat); $i++) {
199
+				$noFormat[$i] = $this->rTrimLines($noFormat[$i]); // remove white space after line ending
200
+				$html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html);
201
+			}
202
+			return $html;
203
+		}
204
+		// remove empty lines
205
+		$htmlArray = [''];
206
+		$z = 1;
207
+		for ($x = 0; $x < count($htmlArrayTemp); $x++) {
208
+			$t = trim($htmlArrayTemp[$x]);
209
+			if ($t !== '') {
210
+				$htmlArray[$z] = $htmlArrayTemp[$x];
211
+				$z++;
212
+			} else {
213
+				$htmlArray[$z] = $this->emptySpaceChar;
214
+				$z++;
215
+			}
216
+		}
217
+
218
+		// rebuild html
219
+		$html = '';
220
+		$tabs = 0;
221
+		for ($x = 0; $x < count($htmlArray); $x++) {
222
+			// check if the element should stand in a new line
223
+			$newline = false;
224
+			if (substr($htmlArray[$x - 1], 0, 5) == '<?xml') {
225
+				$newline = true;
226
+			} elseif ($this->formatType == 2 && ( // minimalistic line break
227
+					# this element has a line break before itself
228
+					preg_match(
229
+						'/<' . $structureBoxLikeElements . '(.*)>/Usi',
230
+						$htmlArray[$x]
231
+					) || preg_match(
232
+						'/<' . $structureBoxLikeElements . '(.*) \/>/Usi',
233
+						$htmlArray[$x]
234
+					) || # one element before is a element that has a line break after
235
+					preg_match(
236
+						'/<\/' . $structureBoxLikeElements . '(.*)>/Usi',
237
+						$htmlArray[$x - 1]
238
+					) || substr(
239
+						$htmlArray[$x - 1],
240
+						0,
241
+						4
242
+					) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
243
+			) {
244
+				$newline = true;
245
+			} elseif ($this->formatType == 3 && ( // aestetic line break
246
+					# this element has a line break before itself
247
+					preg_match(
248
+						'/<' . $esteticBoxLikeElements . '(.*)>/Usi',
249
+						$htmlArray[$x]
250
+					) || preg_match(
251
+						'/<' . $esteticBoxLikeElements . '(.*) \/>/Usi',
252
+						$htmlArray[$x]
253
+					) || # one element before is a element that has a line break after
254
+					preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr(
255
+						$htmlArray[$x - 1],
256
+						0,
257
+						4
258
+					) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
259
+			) {
260
+				$newline = true;
261
+			} elseif ($this->formatType >= 4 && ( // logical line break
262
+					# this element has a line break before itself
263
+					preg_match(
264
+						'/<' . $allBoxLikeElements . '(.*)>/Usi',
265
+						$htmlArray[$x]
266
+					) || preg_match(
267
+						'/<' . $allBoxLikeElements . '(.*) \/>/Usi',
268
+						$htmlArray[$x]
269
+					) || # one element before is a element that has a line break after
270
+					preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr(
271
+						$htmlArray[$x - 1],
272
+						0,
273
+						4
274
+					) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1]))
275
+			) {
276
+				$newline = true;
277
+			}
278
+
279
+			// count down a tab
280
+			if (substr($htmlArray[$x], 0, 2) == '</') {
281
+				$tabs--;
282
+			}
283
+
284
+			// add tabs and line breaks in front of the current tag
285
+			if ($newline) {
286
+				$html .= $this->newline;
287
+				for ($y = 0; $y < $tabs; $y++) {
288
+					$html .= $this->tab;
289
+				}
290
+			}
291
+
292
+			// remove white spaces and line breaks and add current tag to the html-string
293
+			if (substr($htmlArray[$x - 1], 0, 4) == '<pre' // remove white space after line ending in PRE / TEXTAREA / comment
294
+				|| substr($htmlArray[$x - 1], 0, 9) == '<textarea' || substr($htmlArray[$x - 1], 0, 4) == '<!--'
295
+			) {
296
+				$html .= $this->rTrimLines($htmlArray[$x]);
297
+			} elseif (substr($htmlArray[$x], 0, 9) == '<![CDATA[' // remove multiple white space in CDATA / XML
298
+				|| substr($htmlArray[$x], 0, 5) == '<?xml'
299
+			) {
300
+				$html .= $this->killWhiteSpace($htmlArray[$x]);
301
+			} else { // remove all line breaks
302
+				$html .= $this->killLineBreaks($htmlArray[$x]);
303
+			}
304
+
305
+			// count up a tab
306
+			if (substr($htmlArray[$x], 0, 1) == '<' && substr($htmlArray[$x], 1, 1) != '/') {
307
+				if (
308
+					substr($htmlArray[$x], 1, 1) !== ' '
309
+					&& substr($htmlArray[$x], 1, 3) !== 'img'
310
+					&& substr($htmlArray[$x], 1, 6) !== 'source'
311
+					&& substr($htmlArray[$x], 1, 2) !== 'br'
312
+					&& substr($htmlArray[$x], 1, 2) !== 'hr'
313
+					&& substr($htmlArray[$x], 1, 5) !== 'input'
314
+					&& substr($htmlArray[$x], 1, 4) !== 'link'
315
+					&& substr($htmlArray[$x], 1, 4) !== 'meta'
316
+					&& substr($htmlArray[$x], 1, 4) !== 'col '
317
+					&& substr($htmlArray[$x], 1, 5) !== 'frame'
318
+					&& substr($htmlArray[$x], 1, 7) !== 'isindex'
319
+					&& substr($htmlArray[$x], 1, 5) !== 'param'
320
+					&& substr($htmlArray[$x], 1, 4) !== 'area'
321
+					&& substr($htmlArray[$x], 1, 4) !== 'base'
322
+					&& substr($htmlArray[$x], 0, 2) !== '<!'
323
+					&& substr($htmlArray[$x], 0, 5) !== '<?xml'
324
+				) {
325
+					$tabs++;
326
+				}
327
+			}
328
+		}
329
+
330
+		// Remove empty lines
331
+		if ($this->formatType > 1) {
332
+			$this->removeEmptyLines($html);
333
+		}
334
+
335
+		// Restore saved comments, styles and java-scripts
336
+		for ($i = 0; $i < count($noFormat); $i++) {
337
+			$noFormat[$i] = $this->rTrimLines($noFormat[$i]); // remove white space after line ending
338
+			$html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html);
339
+		}
340
+
341
+		// include debug comment at the end
342
+		if ($tabs != 0 && $this->debugComment === true) {
343
+			$html .= '<!--' . $tabs . " open elements found-->\r\n";
344
+		}
345
+
346
+		return $html;
347
+	}
348
+
349
+	/**
350
+	 * Remove ALL line breaks and multiple white space
351
+	 *
352
+	 * @param string $html
353
+	 *
354
+	 * @return string
355
+	 */
356
+	protected function killLineBreaks($html)
357
+	{
358
+		$html = $this->convNlOs($html);
359
+		$html = str_replace($this->newline, "", $html);
360
+		$html = preg_replace('/\s\s+/u', ' ', $html);
361
+		return $html;
362
+	}
363
+
364
+	/**
365
+	 * Remove multiple white space, keeps line breaks
366
+	 *
367
+	 * @param string $html
368
+	 *
369
+	 * @return string
370
+	 */
371
+	protected function killWhiteSpace($html)
372
+	{
373
+		$html = $this->convNlOs($html);
374
+		$temp = explode($this->newline, $html);
375
+		for ($i = 0; $i < count($temp); $i++) {
376
+			if (!trim($temp[$i])) {
377
+				unset($temp[$i]);
378
+			} else {
379
+				$temp[$i] = trim($temp[$i]);
380
+				$temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]);
381
+			}
382
+		}
383
+		$html = implode($this->newline, $temp);
384
+		return $html;
385
+	}
386
+
387
+	/**
388
+	 * Remove white space at the end of lines, keeps other white space and line breaks
389
+	 *
390
+	 * @param string $html
391
+	 *
392
+	 * @return string
393
+	 */
394
+	protected function rTrimLines($html)
395
+	{
396
+		$html = $this->convNlOs($html);
397
+		$temp = explode($this->newline, $html);
398
+		for ($i = 0; $i < count($temp); $i++) {
399
+			$temp[$i] = rtrim($temp[$i]);
400
+		}
401
+		$html = implode($this->newline, $temp);
402
+		return $html;
403
+	}
404
+
405
+	/**
406
+	 * Convert newlines according to the current OS
407
+	 *
408
+	 * @param string $html
409
+	 *
410
+	 * @return string
411
+	 */
412
+	protected function convNlOs($html)
413
+	{
414
+		$html = preg_replace("(\r\n|\n|\r)", $this->newline, $html);
415
+		return $html;
416
+	}
417
+
418
+	/**
419
+	 * Remove tabs and empty spaces before and after lines, transforms linebreaks system conform
420
+	 *
421
+	 * @param string $html Html-Code
422
+	 *
423
+	 * @return void
424
+	 */
425
+	protected function trimLines(&$html)
426
+	{
427
+		$html = str_replace("\t", "", $html);
428
+		// convert newlines according to the current OS
429
+		if (Environment::isWindows()) {
430
+			$html = str_replace("\n", "\r\n", $html);
431
+		} else {
432
+			$html = str_replace("\r\n", "\n", $html);
433
+		}
434
+		$temp = explode($this->newline, $html);
435
+		$temp = array_map('trim', $temp);
436
+		$html = implode($this->newline, $temp);
437
+		unset($temp);
438
+	}
439
+
440
+	/**
441
+	 * Remove empty lines
442
+	 *
443
+	 * @param string $html
444
+	 *
445
+	 * @return void
446
+	 */
447
+	protected function removeEmptyLines(&$html)
448
+	{
449
+		$temp = explode($this->newline, $html);
450
+		$result = [];
451
+		for ($i = 0; $i < count($temp); ++$i) {
452
+			if ("" == trim($temp[$i])) {
453
+				continue;
454
+			}
455
+			$result[] = $temp[$i];
456
+		}
457
+		$html = implode($this->newline, $result);
458
+	}
459
+
460
+	/**
461
+	 * Remove new lines where unnecessary
462
+	 * spares line breaks within: pre, textarea, ...
463
+	 *
464
+	 * @param string $html
465
+	 *
466
+	 * @return void
467
+	 */
468
+	protected function removeNewLines(&$html)
469
+	{
470
+		$splitArray = [
471
+			'textarea',
472
+			'pre'
473
+		]; // eventuell auch: span, script, style
474
+		$peaces = preg_split('#(<(' . implode('|', $splitArray) . ').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE);
475
+		$html = "";
476
+		for ($i = 0; $i < count($peaces); $i++) {
477
+			if (($i + 1) % 3 == 0) {
478
+				continue;
479
+			}
480
+			$html .= (($i - 1) % 3 != 0) ? $this->killLineBreaks($peaces[$i]) : $peaces[$i];
481
+		}
482
+	}
483
+
484
+	/**
485
+	 * Remove obsolete link schema
486
+	 *
487
+	 * @param string $html
488
+	 *
489
+	 * @return void
490
+	 */
491
+	protected function removeLinkSchema(&$html)
492
+	{
493
+		$html = preg_replace("/<link rel=\"?schema.dc\"?.+?>/is", "", $html);
494
+	}
495
+
496
+	/**
497
+	 * Remove empty alt tags
498
+	 *
499
+	 * @param string $html
500
+	 *
501
+	 * @return void
502
+	 */
503
+	protected function removeEmptyAltAtr(&$html)
504
+	{
505
+		$html = str_replace("alt=\"\"", "", $html);
506
+	}
507
+
508
+	/**
509
+	 * Remove broken links in <a> tags
510
+	 *
511
+	 * @param string $html
512
+	 *
513
+	 * @return void
514
+	 */
515
+	protected function removeRealUrlBrokenRootLink(&$html)
516
+	{
517
+		$html = str_replace('href=".html"', 'href=""', $html);
518
+	}
519
+
520
+	/**
521
+	 * Include configured header comment in HTML content block
522
+	 *
523
+	 * @param $html
524
+	 */
525
+	public function includeHeaderComment(&$html)
526
+	{
527
+		if (!empty($this->headerComment)) {
528
+			$html = preg_replace_callback('/<meta http-equiv(.*)>/Usi', function ($matches) {
529
+				return trim($matches[0] . $this->newline . $this->tab . $this->tab . '<!-- ' . $this->headerComment . '-->');
530
+			}, $html, 1);
531
+		}
532
+	}
533 533
 }
Please login to merge, or discard this patch.