|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace HTML\Sourceopt\Service; |
|
4
|
|
|
|
|
5
|
|
|
use HTML\Sourceopt\Manipulation\ManipulationInterface; |
|
6
|
|
|
use HTML\Sourceopt\Manipulation\RemoveBlurScript; |
|
7
|
|
|
use HTML\Sourceopt\Manipulation\RemoveComments; |
|
8
|
|
|
use HTML\Sourceopt\Manipulation\RemoveGenerator; |
|
9
|
|
|
use TYPO3\CMS\Core\Core\Environment; |
|
10
|
|
|
use TYPO3\CMS\Core\SingletonInterface; |
|
11
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility; |
|
12
|
|
|
|
|
13
|
|
|
/** |
|
14
|
|
|
* Service: Clean parsed HTML functionality |
|
15
|
|
|
* Based on the extension 'sourceopt' |
|
16
|
|
|
*/ |
|
17
|
|
|
class CleanHtmlService implements SingletonInterface |
|
18
|
|
|
{ |
|
19
|
|
|
|
|
20
|
|
|
/** |
|
21
|
|
|
* Enable Debug comment in footer |
|
22
|
|
|
* |
|
23
|
|
|
* @var boolean |
|
24
|
|
|
*/ |
|
25
|
|
|
protected $debugComment = false; |
|
26
|
|
|
|
|
27
|
|
|
/** |
|
28
|
|
|
* Format Type |
|
29
|
|
|
* |
|
30
|
|
|
* @var integer |
|
31
|
|
|
*/ |
|
32
|
|
|
protected $formatType = 0; |
|
33
|
|
|
|
|
34
|
|
|
/** |
|
35
|
|
|
* Tab character |
|
36
|
|
|
* |
|
37
|
|
|
* @var string |
|
38
|
|
|
*/ |
|
39
|
|
|
protected $tab = "\t"; |
|
40
|
|
|
|
|
41
|
|
|
/** |
|
42
|
|
|
* Newline character |
|
43
|
|
|
* |
|
44
|
|
|
* @var string |
|
45
|
|
|
*/ |
|
46
|
|
|
protected $newline = "\n"; |
|
47
|
|
|
|
|
48
|
|
|
/** |
|
49
|
|
|
* Configured extra header comment |
|
50
|
|
|
* |
|
51
|
|
|
* @var string |
|
52
|
|
|
*/ |
|
53
|
|
|
protected $headerComment = ''; |
|
54
|
|
|
|
|
55
|
|
|
/** |
|
56
|
|
|
* Empty space char |
|
57
|
|
|
* @var string |
|
58
|
|
|
*/ |
|
59
|
|
|
protected $emptySpaceChar = ' '; |
|
60
|
|
|
|
|
61
|
|
|
/** |
|
62
|
|
|
* Set variables based on given config |
|
63
|
|
|
* |
|
64
|
|
|
* @param array $config |
|
65
|
|
|
* |
|
66
|
|
|
* @return void |
|
67
|
|
|
*/ |
|
68
|
|
|
public function setVariables(array $config) |
|
69
|
|
|
{ |
|
70
|
|
|
// Set newline based on OS |
|
71
|
|
|
if (Environment::isWindows()) { |
|
72
|
|
|
$this->newline = "\r\n"; |
|
73
|
|
|
} else { |
|
74
|
|
|
$this->newline = "\n"; |
|
75
|
|
|
} |
|
76
|
|
|
|
|
77
|
|
|
if (!empty($config)) { |
|
78
|
|
|
if ($config['formatHtml'] && is_numeric($config['formatHtml'])) { |
|
79
|
|
|
$this->formatType = (int)$config['formatHtml']; |
|
80
|
|
|
} |
|
81
|
|
|
|
|
82
|
|
|
if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) { |
|
83
|
|
|
$this->tab = str_pad('', $config['formatHtml.']['tabSize'], ' '); |
|
84
|
|
|
} |
|
85
|
|
|
|
|
86
|
|
|
if (isset($config['formatHtml.']['debugComment'])) { |
|
87
|
|
|
$this->debugComment = (bool)$config['formatHtml.']['debugComment']; |
|
88
|
|
|
} |
|
89
|
|
|
|
|
90
|
|
|
if (isset($config['headerComment'])) { |
|
91
|
|
|
$this->headerComment = $config['headerComment']; |
|
92
|
|
|
} |
|
93
|
|
|
|
|
94
|
|
|
if (isset($config['dropEmptySpaceChar']) && (bool)$config['dropEmptySpaceChar']) { |
|
95
|
|
|
$this->emptySpaceChar = ''; |
|
96
|
|
|
} |
|
97
|
|
|
} |
|
98
|
|
|
} |
|
99
|
|
|
|
|
100
|
|
|
/** |
|
101
|
|
|
* Clean given HTML with formatter |
|
102
|
|
|
* |
|
103
|
|
|
* @param string $html |
|
104
|
|
|
* @param array $config |
|
105
|
|
|
* |
|
106
|
|
|
* @return string |
|
107
|
|
|
*/ |
|
108
|
|
|
public function clean($html, $config = []) |
|
109
|
|
|
{ |
|
110
|
|
|
if (!empty($config)) { |
|
111
|
|
|
if ((bool)$config['enabled'] === false) { |
|
112
|
|
|
return $html; |
|
113
|
|
|
} |
|
114
|
|
|
|
|
115
|
|
|
$this->setVariables($config); |
|
116
|
|
|
} |
|
117
|
|
|
|
|
118
|
|
|
$manipulations = []; |
|
119
|
|
|
|
|
120
|
|
|
if (isset($config['removeGenerator']) && (bool)$config['removeGenerator']) { |
|
121
|
|
|
$manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class); |
|
122
|
|
|
} |
|
123
|
|
|
|
|
124
|
|
|
if (isset($config['removeComments']) && (bool)$config['removeComments']) { |
|
125
|
|
|
$manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class); |
|
126
|
|
|
} |
|
127
|
|
|
|
|
128
|
|
|
if (isset($config['removeBlurScript']) && (bool)$config['removeBlurScript']) { |
|
129
|
|
|
$manipulations['removeBlurScript'] = GeneralUtility::makeInstance(RemoveBlurScript::class); |
|
130
|
|
|
} |
|
131
|
|
|
|
|
132
|
|
|
if (!empty($this->headerComment)) { |
|
133
|
|
|
$this->includeHeaderComment($html); |
|
134
|
|
|
} |
|
135
|
|
|
|
|
136
|
|
|
foreach ($manipulations as $key => $manipulation) { |
|
137
|
|
|
/** @var ManipulationInterface $manipulation */ |
|
138
|
|
|
$configuration = isset($config[$key . '.']) && is_array($config[$key . '.']) ? $config[$key . '.'] : []; |
|
139
|
|
|
$html = $manipulation->manipulate($html, $configuration); |
|
140
|
|
|
} |
|
141
|
|
|
|
|
142
|
|
|
if ($this->formatType > 0) { |
|
143
|
|
|
$html = $this->formatHtml($html); |
|
144
|
|
|
} |
|
145
|
|
|
|
|
146
|
|
|
return $html; |
|
147
|
|
|
} |
|
148
|
|
|
|
|
149
|
|
|
/** |
|
150
|
|
|
* Formats the (X)HTML code: |
|
151
|
|
|
* - taps according to the hirarchy of the tags |
|
152
|
|
|
* - removes empty spaces between tags |
|
153
|
|
|
* - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..) |
|
154
|
|
|
* choose from five options: |
|
155
|
|
|
* 0 => off |
|
156
|
|
|
* 1 => no line break at all (code in one line) |
|
157
|
|
|
* 2 => minimalistic line breaks (structure defining box-elements) |
|
158
|
|
|
* 3 => aesthetic line breaks (important box-elements) |
|
159
|
|
|
* 4 => logic line breaks (all box-elements) |
|
160
|
|
|
* 5 => max line breaks (all elements) |
|
161
|
|
|
* |
|
162
|
|
|
* @param string $html |
|
163
|
|
|
* |
|
164
|
|
|
* @return string |
|
165
|
|
|
*/ |
|
166
|
|
|
protected function formatHtml($html) |
|
167
|
|
|
{ |
|
168
|
|
|
// Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers |
|
169
|
|
|
preg_match_all( |
|
170
|
|
|
'/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im', |
|
171
|
|
|
$html, |
|
172
|
|
|
$matches |
|
173
|
|
|
); |
|
174
|
|
|
$noFormat = $matches[0]; // do not format these block elements |
|
175
|
|
|
for ($i = 0; $i < count($noFormat); $i++) { |
|
|
|
|
|
|
176
|
|
|
$html = str_replace($noFormat[$i], "\n<!-- ELEMENT $i -->", $html); |
|
177
|
|
|
} |
|
178
|
|
|
|
|
179
|
|
|
// define box elements for formatting |
|
180
|
|
|
$trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section'; |
|
181
|
|
|
$functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup'; |
|
182
|
|
|
$usableBoxElements = 'applet|button|del|iframe|ins|map|object|script'; |
|
183
|
|
|
$imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--'; |
|
184
|
|
|
$allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')'; |
|
185
|
|
|
$esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)'; |
|
186
|
|
|
$structureBoxLikeElements = '(?>html|head|body|div|!--)'; |
|
187
|
|
|
|
|
188
|
|
|
// split html into it's elements |
|
189
|
|
|
$htmlArrayTemp = preg_split( |
|
190
|
|
|
'/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', |
|
191
|
|
|
$html, |
|
192
|
|
|
-1, |
|
193
|
|
|
PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY |
|
194
|
|
|
); |
|
195
|
|
|
|
|
196
|
|
|
if ($htmlArrayTemp === false) { |
|
197
|
|
|
return $html; |
|
198
|
|
|
} |
|
199
|
|
|
// remove empty lines |
|
200
|
|
|
$htmlArray = ['']; |
|
201
|
|
|
$z = 1; |
|
202
|
|
|
for ($x = 0; $x < count($htmlArrayTemp); $x++) { |
|
|
|
|
|
|
203
|
|
|
$t = trim($htmlArrayTemp[$x]); |
|
204
|
|
|
if ($t !== '') { |
|
205
|
|
|
$htmlArray[$z] = $htmlArrayTemp[$x]; |
|
206
|
|
|
$z++; |
|
207
|
|
|
} else { |
|
208
|
|
|
$htmlArray[$z] = $this->emptySpaceChar; |
|
209
|
|
|
$z++; |
|
210
|
|
|
} |
|
211
|
|
|
} |
|
212
|
|
|
|
|
213
|
|
|
// rebuild html |
|
214
|
|
|
$html = ''; |
|
215
|
|
|
$tabs = 0; |
|
216
|
|
|
for ($x = 0; $x < count($htmlArray); $x++) { |
|
|
|
|
|
|
217
|
|
|
// check if the element should stand in a new line |
|
218
|
|
|
$newline = false; |
|
219
|
|
|
if (substr($htmlArray[$x - 1], 0, 5) == '<?xml') { |
|
220
|
|
|
$newline = true; |
|
221
|
|
|
} elseif ($this->formatType == 2 && ( // minimalistic line break |
|
222
|
|
|
# this element has a line break before itself |
|
223
|
|
|
preg_match( |
|
224
|
|
|
'/<' . $structureBoxLikeElements . '(.*)>/Usi', |
|
225
|
|
|
$htmlArray[$x] |
|
226
|
|
|
) || preg_match( |
|
227
|
|
|
'/<' . $structureBoxLikeElements . '(.*) \/>/Usi', |
|
228
|
|
|
$htmlArray[$x] |
|
229
|
|
|
) || # one element before is a element that has a line break after |
|
230
|
|
|
preg_match( |
|
231
|
|
|
'/<\/' . $structureBoxLikeElements . '(.*)>/Usi', |
|
232
|
|
|
$htmlArray[$x - 1] |
|
233
|
|
|
) || substr( |
|
234
|
|
|
$htmlArray[$x - 1], |
|
235
|
|
|
0, |
|
236
|
|
|
4 |
|
237
|
|
|
) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
238
|
|
|
) { |
|
239
|
|
|
$newline = true; |
|
240
|
|
|
} elseif ($this->formatType == 3 && ( // aestetic line break |
|
241
|
|
|
# this element has a line break before itself |
|
242
|
|
|
preg_match( |
|
243
|
|
|
'/<' . $esteticBoxLikeElements . '(.*)>/Usi', |
|
244
|
|
|
$htmlArray[$x] |
|
245
|
|
|
) || preg_match( |
|
246
|
|
|
'/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', |
|
247
|
|
|
$htmlArray[$x] |
|
248
|
|
|
) || # one element before is a element that has a line break after |
|
249
|
|
|
preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
250
|
|
|
$htmlArray[$x - 1], |
|
251
|
|
|
0, |
|
252
|
|
|
4 |
|
253
|
|
|
) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
254
|
|
|
) { |
|
255
|
|
|
$newline = true; |
|
256
|
|
|
} elseif ($this->formatType >= 4 && ( // logical line break |
|
257
|
|
|
# this element has a line break before itself |
|
258
|
|
|
preg_match( |
|
259
|
|
|
'/<' . $allBoxLikeElements . '(.*)>/Usi', |
|
260
|
|
|
$htmlArray[$x] |
|
261
|
|
|
) || preg_match( |
|
262
|
|
|
'/<' . $allBoxLikeElements . '(.*) \/>/Usi', |
|
263
|
|
|
$htmlArray[$x] |
|
264
|
|
|
) || # one element before is a element that has a line break after |
|
265
|
|
|
preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
|
266
|
|
|
$htmlArray[$x - 1], |
|
267
|
|
|
0, |
|
268
|
|
|
4 |
|
269
|
|
|
) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
|
270
|
|
|
) { |
|
271
|
|
|
$newline = true; |
|
272
|
|
|
} |
|
273
|
|
|
|
|
274
|
|
|
// count down a tab |
|
275
|
|
|
if (substr($htmlArray[$x], 0, 2) == '</') { |
|
276
|
|
|
$tabs--; |
|
277
|
|
|
} |
|
278
|
|
|
|
|
279
|
|
|
// add tabs and line breaks in front of the current tag |
|
280
|
|
|
if ($newline) { |
|
281
|
|
|
$html .= $this->newline; |
|
282
|
|
|
for ($y = 0; $y < $tabs; $y++) { |
|
283
|
|
|
$html .= $this->tab; |
|
284
|
|
|
} |
|
285
|
|
|
} |
|
286
|
|
|
|
|
287
|
|
|
// remove white spaces and line breaks and add current tag to the html-string |
|
288
|
|
|
if (substr($htmlArray[$x - 1], 0, 4) == '<pre' // remove white space after line ending in PRE / TEXTAREA / comment |
|
289
|
|
|
|| substr($htmlArray[$x - 1], 0, 9) == '<textarea' || substr($htmlArray[$x - 1], 0, 4) == '<!--' |
|
290
|
|
|
) { |
|
291
|
|
|
$html .= $this->rTrimLines($htmlArray[$x]); |
|
292
|
|
|
} elseif (substr($htmlArray[$x], 0, 9) == '<![CDATA[' // remove multiple white space in CDATA / XML |
|
293
|
|
|
|| substr($htmlArray[$x], 0, 5) == '<?xml' |
|
294
|
|
|
) { |
|
295
|
|
|
$html .= $this->killWhiteSpace($htmlArray[$x]); |
|
296
|
|
|
} else { // remove all line breaks |
|
297
|
|
|
$html .= $this->killLineBreaks($htmlArray[$x]); |
|
298
|
|
|
} |
|
299
|
|
|
|
|
300
|
|
|
// count up a tab |
|
301
|
|
|
if (substr($htmlArray[$x], 0, 1) == '<' && substr($htmlArray[$x], 1, 1) != '/') { |
|
302
|
|
|
if ( |
|
303
|
|
|
substr($htmlArray[$x], 1, 1) !== ' ' |
|
304
|
|
|
&& substr($htmlArray[$x], 1, 3) !== 'img' |
|
305
|
|
|
&& substr($htmlArray[$x], 1, 6) !== 'source' |
|
306
|
|
|
&& substr($htmlArray[$x], 1, 2) !== 'br' |
|
307
|
|
|
&& substr($htmlArray[$x], 1, 2) !== 'hr' |
|
308
|
|
|
&& substr($htmlArray[$x], 1, 5) !== 'input' |
|
309
|
|
|
&& substr($htmlArray[$x], 1, 4) !== 'link' |
|
310
|
|
|
&& substr($htmlArray[$x], 1, 4) !== 'meta' |
|
311
|
|
|
&& substr($htmlArray[$x], 1, 4) !== 'col ' |
|
312
|
|
|
&& substr($htmlArray[$x], 1, 5) !== 'frame' |
|
313
|
|
|
&& substr($htmlArray[$x], 1, 7) !== 'isindex' |
|
314
|
|
|
&& substr($htmlArray[$x], 1, 5) !== 'param' |
|
315
|
|
|
&& substr($htmlArray[$x], 1, 4) !== 'area' |
|
316
|
|
|
&& substr($htmlArray[$x], 1, 4) !== 'base' |
|
317
|
|
|
&& substr($htmlArray[$x], 0, 2) !== '<!' |
|
318
|
|
|
&& substr($htmlArray[$x], 0, 5) !== '<?xml' |
|
319
|
|
|
) { |
|
320
|
|
|
$tabs++; |
|
321
|
|
|
} |
|
322
|
|
|
} |
|
323
|
|
|
} |
|
324
|
|
|
|
|
325
|
|
|
// Remove empty lines |
|
326
|
|
|
if ($this->formatType > 1) { |
|
327
|
|
|
$this->removeEmptyLines($html); |
|
328
|
|
|
} |
|
329
|
|
|
|
|
330
|
|
|
// Restore saved comments, styles and java-scripts |
|
331
|
|
|
for ($i = 0; $i < count($noFormat); $i++) { |
|
|
|
|
|
|
332
|
|
|
$noFormat[$i] = $this->rTrimLines($noFormat[$i]); // remove white space after line ending |
|
333
|
|
|
$html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html); |
|
334
|
|
|
} |
|
335
|
|
|
|
|
336
|
|
|
// include debug comment at the end |
|
337
|
|
|
if ($tabs != 0 && $this->debugComment === true) { |
|
338
|
|
|
$html .= '<!--' . $tabs . " open elements found-->\r\n"; |
|
339
|
|
|
} |
|
340
|
|
|
|
|
341
|
|
|
return $html; |
|
342
|
|
|
} |
|
343
|
|
|
|
|
344
|
|
|
/** |
|
345
|
|
|
* Remove ALL line breaks and multiple white space |
|
346
|
|
|
* |
|
347
|
|
|
* @param string $html |
|
348
|
|
|
* |
|
349
|
|
|
* @return string |
|
350
|
|
|
*/ |
|
351
|
|
|
protected function killLineBreaks($html) |
|
352
|
|
|
{ |
|
353
|
|
|
$html = $this->convNlOs($html); |
|
354
|
|
|
$html = str_replace($this->newline, "", $html); |
|
355
|
|
|
$html = preg_replace('/\s\s+/u', ' ', $html); |
|
356
|
|
|
return $html; |
|
357
|
|
|
} |
|
358
|
|
|
|
|
359
|
|
|
/** |
|
360
|
|
|
* Remove multiple white space, keeps line breaks |
|
361
|
|
|
* |
|
362
|
|
|
* @param string $html |
|
363
|
|
|
* |
|
364
|
|
|
* @return string |
|
365
|
|
|
*/ |
|
366
|
|
|
protected function killWhiteSpace($html) |
|
367
|
|
|
{ |
|
368
|
|
|
$html = $this->convNlOs($html); |
|
369
|
|
|
$temp = explode($this->newline, $html); |
|
370
|
|
|
for ($i = 0; $i < count($temp); $i++) { |
|
|
|
|
|
|
371
|
|
|
if (!trim($temp[$i])) { |
|
372
|
|
|
unset($temp[$i]); |
|
373
|
|
|
} else { |
|
374
|
|
|
$temp[$i] = trim($temp[$i]); |
|
375
|
|
|
$temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]); |
|
376
|
|
|
} |
|
377
|
|
|
} |
|
378
|
|
|
$html = implode($this->newline, $temp); |
|
379
|
|
|
return $html; |
|
380
|
|
|
} |
|
381
|
|
|
|
|
382
|
|
|
/** |
|
383
|
|
|
* Remove white space at the end of lines, keeps other white space and line breaks |
|
384
|
|
|
* |
|
385
|
|
|
* @param string $html |
|
386
|
|
|
* |
|
387
|
|
|
* @return string |
|
388
|
|
|
*/ |
|
389
|
|
|
protected function rTrimLines($html) |
|
390
|
|
|
{ |
|
391
|
|
|
$html = $this->convNlOs($html); |
|
392
|
|
|
$temp = explode($this->newline, $html); |
|
393
|
|
|
for ($i = 0; $i < count($temp); $i++) { |
|
|
|
|
|
|
394
|
|
|
$temp[$i] = rtrim($temp[$i]); |
|
395
|
|
|
} |
|
396
|
|
|
$html = implode($this->newline, $temp); |
|
397
|
|
|
return $html; |
|
398
|
|
|
} |
|
399
|
|
|
|
|
400
|
|
|
/** |
|
401
|
|
|
* Convert newlines according to the current OS |
|
402
|
|
|
* |
|
403
|
|
|
* @param string $html |
|
404
|
|
|
* |
|
405
|
|
|
* @return string |
|
406
|
|
|
*/ |
|
407
|
|
|
protected function convNlOs($html) |
|
408
|
|
|
{ |
|
409
|
|
|
$html = preg_replace("(\r\n|\n|\r)", $this->newline, $html); |
|
410
|
|
|
return $html; |
|
411
|
|
|
} |
|
412
|
|
|
|
|
413
|
|
|
/** |
|
414
|
|
|
* Remove tabs and empty spaces before and after lines, transforms linebreaks system conform |
|
415
|
|
|
* |
|
416
|
|
|
* @param string $html Html-Code |
|
417
|
|
|
* |
|
418
|
|
|
* @return void |
|
419
|
|
|
*/ |
|
420
|
|
|
protected function trimLines(&$html) |
|
421
|
|
|
{ |
|
422
|
|
|
$html = str_replace("\t", "", $html); |
|
423
|
|
|
// convert newlines according to the current OS |
|
424
|
|
|
if (Environment::isWindows()) { |
|
425
|
|
|
$html = str_replace("\n", "\r\n", $html); |
|
426
|
|
|
} else { |
|
427
|
|
|
$html = str_replace("\r\n", "\n", $html); |
|
428
|
|
|
} |
|
429
|
|
|
$temp = explode($this->newline, $html); |
|
430
|
|
|
$temp = array_map('trim', $temp); |
|
431
|
|
|
$html = implode($this->newline, $temp); |
|
432
|
|
|
unset($temp); |
|
433
|
|
|
} |
|
434
|
|
|
|
|
435
|
|
|
/** |
|
436
|
|
|
* Remove empty lines |
|
437
|
|
|
* |
|
438
|
|
|
* @param string $html |
|
439
|
|
|
* |
|
440
|
|
|
* @return void |
|
441
|
|
|
*/ |
|
442
|
|
|
protected function removeEmptyLines(&$html) |
|
443
|
|
|
{ |
|
444
|
|
|
$temp = explode($this->newline, $html); |
|
445
|
|
|
$result = []; |
|
446
|
|
|
for ($i = 0; $i < count($temp); ++$i) { |
|
|
|
|
|
|
447
|
|
|
if ("" == trim($temp[$i])) { |
|
448
|
|
|
continue; |
|
449
|
|
|
} |
|
450
|
|
|
$result[] = $temp[$i]; |
|
451
|
|
|
} |
|
452
|
|
|
$html = implode($this->newline, $result); |
|
453
|
|
|
} |
|
454
|
|
|
|
|
455
|
|
|
/** |
|
456
|
|
|
* Remove new lines where unnecessary |
|
457
|
|
|
* spares line breaks within: pre, textarea, ... |
|
458
|
|
|
* |
|
459
|
|
|
* @param string $html |
|
460
|
|
|
* |
|
461
|
|
|
* @return void |
|
462
|
|
|
*/ |
|
463
|
|
|
protected function removeNewLines(&$html) |
|
464
|
|
|
{ |
|
465
|
|
|
$splitArray = [ |
|
466
|
|
|
'textarea', |
|
467
|
|
|
'pre' |
|
468
|
|
|
]; // eventuell auch: span, script, style |
|
469
|
|
|
$peaces = preg_split('#(<(' . implode('|', $splitArray) . ').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
470
|
|
|
$html = ""; |
|
471
|
|
|
for ($i = 0; $i < count($peaces); $i++) { |
|
|
|
|
|
|
472
|
|
|
if (($i + 1) % 3 == 0) { |
|
473
|
|
|
continue; |
|
474
|
|
|
} |
|
475
|
|
|
$html .= (($i - 1) % 3 != 0) ? $this->killLineBreaks($peaces[$i]) : $peaces[$i]; |
|
476
|
|
|
} |
|
477
|
|
|
} |
|
478
|
|
|
|
|
479
|
|
|
/** |
|
480
|
|
|
* Remove obsolete link schema |
|
481
|
|
|
* |
|
482
|
|
|
* @param string $html |
|
483
|
|
|
* |
|
484
|
|
|
* @return void |
|
485
|
|
|
*/ |
|
486
|
|
|
protected function removeLinkSchema(&$html) |
|
487
|
|
|
{ |
|
488
|
|
|
$html = preg_replace("/<link rel=\"?schema.dc\"?.+?>/is", "", $html); |
|
489
|
|
|
} |
|
490
|
|
|
|
|
491
|
|
|
/** |
|
492
|
|
|
* Remove empty alt tags |
|
493
|
|
|
* |
|
494
|
|
|
* @param string $html |
|
495
|
|
|
* |
|
496
|
|
|
* @return void |
|
497
|
|
|
*/ |
|
498
|
|
|
protected function removeEmptyAltAtr(&$html) |
|
499
|
|
|
{ |
|
500
|
|
|
$html = str_replace("alt=\"\"", "", $html); |
|
501
|
|
|
} |
|
502
|
|
|
|
|
503
|
|
|
/** |
|
504
|
|
|
* Remove broken links in <a> tags |
|
505
|
|
|
* |
|
506
|
|
|
* @param string $html |
|
507
|
|
|
* |
|
508
|
|
|
* @return void |
|
509
|
|
|
*/ |
|
510
|
|
|
protected function removeRealUrlBrokenRootLink(&$html) |
|
511
|
|
|
{ |
|
512
|
|
|
$html = str_replace('href=".html"', 'href=""', $html); |
|
513
|
|
|
} |
|
514
|
|
|
|
|
515
|
|
|
/** |
|
516
|
|
|
* Include configured header comment in HTML content block |
|
517
|
|
|
* |
|
518
|
|
|
* @param $html |
|
519
|
|
|
*/ |
|
520
|
|
|
public function includeHeaderComment(&$html) |
|
521
|
|
|
{ |
|
522
|
|
|
if (!empty($this->headerComment)) { |
|
523
|
|
|
$html = preg_replace_callback('/<meta http-equiv(.*)>/Usi', function ($matches) { |
|
524
|
|
|
return trim($matches[0] . $this->newline . $this->tab . $this->tab . '<!-- ' . $this->headerComment . '-->'); |
|
525
|
|
|
}, $html, 1); |
|
526
|
|
|
} |
|
527
|
|
|
} |
|
528
|
|
|
} |
|
529
|
|
|
|
If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: