1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace HTML\Sourceopt\Service; |
4
|
|
|
|
5
|
|
|
use HTML\Sourceopt\Manipulation\ManipulationInterface; |
6
|
|
|
use HTML\Sourceopt\Manipulation\RemoveBlurScript; |
7
|
|
|
use HTML\Sourceopt\Manipulation\RemoveComments; |
8
|
|
|
use HTML\Sourceopt\Manipulation\RemoveGenerator; |
9
|
|
|
use TYPO3\CMS\Core\Core\Environment; |
10
|
|
|
use TYPO3\CMS\Core\SingletonInterface; |
11
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility; |
12
|
|
|
|
13
|
|
|
/** |
14
|
|
|
* Service: Clean parsed HTML functionality |
15
|
|
|
* Based on the extension 'sourceopt' |
16
|
|
|
*/ |
17
|
|
|
class CleanHtmlService implements SingletonInterface |
18
|
|
|
{ |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* Enable Debug comment in footer |
22
|
|
|
* |
23
|
|
|
* @var boolean |
24
|
|
|
*/ |
25
|
|
|
protected $debugComment = false; |
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* Format Type |
29
|
|
|
* |
30
|
|
|
* @var integer |
31
|
|
|
*/ |
32
|
|
|
protected $formatType = 0; |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* Tab character |
36
|
|
|
* |
37
|
|
|
* @var string |
38
|
|
|
*/ |
39
|
|
|
protected $tab = "\t"; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* Newline character |
43
|
|
|
* |
44
|
|
|
* @var string |
45
|
|
|
*/ |
46
|
|
|
protected $newline = "\n"; |
47
|
|
|
|
48
|
|
|
/** |
49
|
|
|
* Configured extra header comment |
50
|
|
|
* |
51
|
|
|
* @var string |
52
|
|
|
*/ |
53
|
|
|
protected $headerComment = ''; |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* Empty space char |
57
|
|
|
* @var string |
58
|
|
|
*/ |
59
|
|
|
protected $emptySpaceChar = ' '; |
60
|
|
|
|
61
|
|
|
/** |
62
|
|
|
* Set variables based on given config |
63
|
|
|
* |
64
|
|
|
* @param array $config |
65
|
|
|
* |
66
|
|
|
* @return void |
67
|
|
|
*/ |
68
|
|
|
public function setVariables(array $config) |
69
|
|
|
{ |
70
|
|
|
// Set newline based on OS |
71
|
|
|
if (Environment::isWindows()) { |
72
|
|
|
$this->newline = "\r\n"; |
73
|
|
|
} else { |
74
|
|
|
$this->newline = "\n"; |
75
|
|
|
} |
76
|
|
|
|
77
|
|
|
if (!empty($config)) { |
78
|
|
|
if ($config['formatHtml'] && is_numeric($config['formatHtml'])) { |
79
|
|
|
$this->formatType = (int)$config['formatHtml']; |
80
|
|
|
} |
81
|
|
|
|
82
|
|
|
if ($config['formatHtml.']['tabSize'] && is_numeric($config['formatHtml.']['tabSize'])) { |
83
|
|
|
$this->tab = str_pad('', $config['formatHtml.']['tabSize'], ' '); |
84
|
|
|
} |
85
|
|
|
|
86
|
|
|
if (isset($config['formatHtml.']['debugComment'])) { |
87
|
|
|
$this->debugComment = (bool)$config['formatHtml.']['debugComment']; |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
if (isset($config['headerComment'])) { |
91
|
|
|
$this->headerComment = $config['headerComment']; |
92
|
|
|
} |
93
|
|
|
|
94
|
|
|
if (isset($config['dropEmptySpaceChar']) && (bool)$config['dropEmptySpaceChar']) { |
95
|
|
|
$this->emptySpaceChar = ''; |
96
|
|
|
} |
97
|
|
|
} |
98
|
|
|
} |
99
|
|
|
|
100
|
|
|
/** |
101
|
|
|
* Clean given HTML with formatter |
102
|
|
|
* |
103
|
|
|
* @param string $html |
104
|
|
|
* @param array $config |
105
|
|
|
* |
106
|
|
|
* @return string |
107
|
|
|
*/ |
108
|
|
|
public function clean($html, $config = []) |
109
|
|
|
{ |
110
|
|
|
if (!empty($config)) { |
111
|
|
|
if ((bool)$config['enabled'] === false) { |
112
|
|
|
return $html; |
113
|
|
|
} |
114
|
|
|
|
115
|
|
|
$this->setVariables($config); |
116
|
|
|
} |
117
|
|
|
|
118
|
|
|
$manipulations = []; |
119
|
|
|
|
120
|
|
|
if (isset($config['removeGenerator']) && (bool)$config['removeGenerator']) { |
121
|
|
|
$manipulations['removeGenerator'] = GeneralUtility::makeInstance(RemoveGenerator::class); |
122
|
|
|
} |
123
|
|
|
|
124
|
|
|
if (isset($config['removeComments']) && (bool)$config['removeComments']) { |
125
|
|
|
$manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class); |
126
|
|
|
} |
127
|
|
|
|
128
|
|
|
if (isset($config['removeBlurScript']) && (bool)$config['removeBlurScript']) { |
129
|
|
|
$manipulations['removeBlurScript'] = GeneralUtility::makeInstance(RemoveBlurScript::class); |
130
|
|
|
} |
131
|
|
|
|
132
|
|
|
if (!empty($this->headerComment)) { |
133
|
|
|
$this->includeHeaderComment($html); |
134
|
|
|
} |
135
|
|
|
|
136
|
|
|
foreach ($manipulations as $key => $manipulation) { |
137
|
|
|
/** @var ManipulationInterface $manipulation */ |
138
|
|
|
$configuration = isset($config[$key . '.']) && is_array($config[$key . '.']) ? $config[$key . '.'] : []; |
139
|
|
|
$html = $manipulation->manipulate($html, $configuration); |
140
|
|
|
} |
141
|
|
|
|
142
|
|
|
if ($this->formatType > 0) { |
143
|
|
|
$html = $this->formatHtml($html); |
144
|
|
|
} |
145
|
|
|
|
146
|
|
|
return $html; |
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
/** |
150
|
|
|
* Formats the (X)HTML code: |
151
|
|
|
* - taps according to the hirarchy of the tags |
152
|
|
|
* - removes empty spaces between tags |
153
|
|
|
* - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..) |
154
|
|
|
* choose from five options: |
155
|
|
|
* 0 => off |
156
|
|
|
* 1 => no line break at all (code in one line) |
157
|
|
|
* 2 => minimalistic line breaks (structure defining box-elements) |
158
|
|
|
* 3 => aesthetic line breaks (important box-elements) |
159
|
|
|
* 4 => logic line breaks (all box-elements) |
160
|
|
|
* 5 => max line breaks (all elements) |
161
|
|
|
* |
162
|
|
|
* @param string $html |
163
|
|
|
* |
164
|
|
|
* @return string |
165
|
|
|
*/ |
166
|
|
|
protected function formatHtml($html) |
167
|
|
|
{ |
168
|
|
|
// Save original formated comments, pre, textarea, styles and java-scripts & replace them with markers |
169
|
|
|
preg_match_all( |
170
|
|
|
'/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im', |
171
|
|
|
$html, |
172
|
|
|
$matches |
173
|
|
|
); |
174
|
|
|
$noFormat = $matches[0]; // do not format these block elements |
175
|
|
|
for ($i = 0; $i < count($noFormat); $i++) { |
|
|
|
|
176
|
|
|
$html = str_replace($noFormat[$i], "\n<!-- ELEMENT $i -->", $html); |
177
|
|
|
} |
178
|
|
|
|
179
|
|
|
// define box elements for formatting |
180
|
|
|
$trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section'; |
181
|
|
|
$functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup'; |
182
|
|
|
$usableBoxElements = 'applet|button|del|iframe|ins|map|object|script'; |
183
|
|
|
$imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--'; |
184
|
|
|
$allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')'; |
185
|
|
|
$esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)'; |
186
|
|
|
$structureBoxLikeElements = '(?>html|head|body|div|!--)'; |
187
|
|
|
|
188
|
|
|
// split html into it's elements |
189
|
|
|
$htmlArrayTemp = preg_split( |
190
|
|
|
'/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', |
191
|
|
|
$html, |
192
|
|
|
-1, |
193
|
|
|
PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY |
194
|
|
|
); |
195
|
|
|
|
196
|
|
|
if ($htmlArrayTemp === false) { |
197
|
|
|
return $html; |
198
|
|
|
} |
199
|
|
|
// remove empty lines |
200
|
|
|
$htmlArray = ['']; |
201
|
|
|
$z = 1; |
202
|
|
|
for ($x = 0; $x < count($htmlArrayTemp); $x++) { |
|
|
|
|
203
|
|
|
$t = trim($htmlArrayTemp[$x]); |
204
|
|
|
if ($t !== '') { |
205
|
|
|
$htmlArray[$z] = $htmlArrayTemp[$x]; |
206
|
|
|
$z++; |
207
|
|
|
} else { |
208
|
|
|
$htmlArray[$z] = $this->emptySpaceChar; |
209
|
|
|
$z++; |
210
|
|
|
} |
211
|
|
|
} |
212
|
|
|
|
213
|
|
|
// rebuild html |
214
|
|
|
$html = ''; |
215
|
|
|
$tabs = 0; |
216
|
|
|
for ($x = 0; $x < count($htmlArray); $x++) { |
|
|
|
|
217
|
|
|
// check if the element should stand in a new line |
218
|
|
|
$newline = false; |
219
|
|
|
if (substr($htmlArray[$x - 1], 0, 5) == '<?xml') { |
220
|
|
|
$newline = true; |
221
|
|
|
} elseif ($this->formatType == 2 && ( // minimalistic line break |
222
|
|
|
# this element has a line break before itself |
223
|
|
|
preg_match( |
224
|
|
|
'/<' . $structureBoxLikeElements . '(.*)>/Usi', |
225
|
|
|
$htmlArray[$x] |
226
|
|
|
) || preg_match( |
227
|
|
|
'/<' . $structureBoxLikeElements . '(.*) \/>/Usi', |
228
|
|
|
$htmlArray[$x] |
229
|
|
|
) || # one element before is a element that has a line break after |
230
|
|
|
preg_match( |
231
|
|
|
'/<\/' . $structureBoxLikeElements . '(.*)>/Usi', |
232
|
|
|
$htmlArray[$x - 1] |
233
|
|
|
) || substr( |
234
|
|
|
$htmlArray[$x - 1], |
235
|
|
|
0, |
236
|
|
|
4 |
237
|
|
|
) == '<!--' || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
238
|
|
|
) { |
239
|
|
|
$newline = true; |
240
|
|
|
} elseif ($this->formatType == 3 && ( // aestetic line break |
241
|
|
|
# this element has a line break before itself |
242
|
|
|
preg_match( |
243
|
|
|
'/<' . $esteticBoxLikeElements . '(.*)>/Usi', |
244
|
|
|
$htmlArray[$x] |
245
|
|
|
) || preg_match( |
246
|
|
|
'/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', |
247
|
|
|
$htmlArray[$x] |
248
|
|
|
) || # one element before is a element that has a line break after |
249
|
|
|
preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
250
|
|
|
$htmlArray[$x - 1], |
251
|
|
|
0, |
252
|
|
|
4 |
253
|
|
|
) == '<!--' || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
254
|
|
|
) { |
255
|
|
|
$newline = true; |
256
|
|
|
} elseif ($this->formatType >= 4 && ( // logical line break |
257
|
|
|
# this element has a line break before itself |
258
|
|
|
preg_match( |
259
|
|
|
'/<' . $allBoxLikeElements . '(.*)>/Usi', |
260
|
|
|
$htmlArray[$x] |
261
|
|
|
) || preg_match( |
262
|
|
|
'/<' . $allBoxLikeElements . '(.*) \/>/Usi', |
263
|
|
|
$htmlArray[$x] |
264
|
|
|
) || # one element before is a element that has a line break after |
265
|
|
|
preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArray[$x - 1]) || substr( |
266
|
|
|
$htmlArray[$x - 1], |
267
|
|
|
0, |
268
|
|
|
4 |
269
|
|
|
) == '<!--' || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArray[$x - 1])) |
270
|
|
|
) { |
271
|
|
|
$newline = true; |
272
|
|
|
} |
273
|
|
|
|
274
|
|
|
// count down a tab |
275
|
|
|
if (substr($htmlArray[$x], 0, 2) == '</') { |
276
|
|
|
$tabs--; |
277
|
|
|
} |
278
|
|
|
|
279
|
|
|
// add tabs and line breaks in front of the current tag |
280
|
|
|
if ($newline) { |
281
|
|
|
$html .= $this->newline; |
282
|
|
|
for ($y = 0; $y < $tabs; $y++) { |
283
|
|
|
$html .= $this->tab; |
284
|
|
|
} |
285
|
|
|
} |
286
|
|
|
|
287
|
|
|
// remove white spaces and line breaks and add current tag to the html-string |
288
|
|
|
if (substr($htmlArray[$x - 1], 0, 4) == '<pre' // remove white space after line ending in PRE / TEXTAREA / comment |
289
|
|
|
|| substr($htmlArray[$x - 1], 0, 9) == '<textarea' || substr($htmlArray[$x - 1], 0, 4) == '<!--' |
290
|
|
|
) { |
291
|
|
|
$html .= $this->rTrimLines($htmlArray[$x]); |
292
|
|
|
} elseif (substr($htmlArray[$x], 0, 9) == '<![CDATA[' // remove multiple white space in CDATA / XML |
293
|
|
|
|| substr($htmlArray[$x], 0, 5) == '<?xml' |
294
|
|
|
) { |
295
|
|
|
$html .= $this->killWhiteSpace($htmlArray[$x]); |
296
|
|
|
} else { // remove all line breaks |
297
|
|
|
$html .= $this->killLineBreaks($htmlArray[$x]); |
298
|
|
|
} |
299
|
|
|
|
300
|
|
|
// count up a tab |
301
|
|
|
if (substr($htmlArray[$x], 0, 1) == '<' && substr($htmlArray[$x], 1, 1) != '/') { |
302
|
|
|
if ( |
303
|
|
|
substr($htmlArray[$x], 1, 1) !== ' ' |
304
|
|
|
&& substr($htmlArray[$x], 1, 3) !== 'img' |
305
|
|
|
&& substr($htmlArray[$x], 1, 6) !== 'source' |
306
|
|
|
&& substr($htmlArray[$x], 1, 2) !== 'br' |
307
|
|
|
&& substr($htmlArray[$x], 1, 2) !== 'hr' |
308
|
|
|
&& substr($htmlArray[$x], 1, 5) !== 'input' |
309
|
|
|
&& substr($htmlArray[$x], 1, 4) !== 'link' |
310
|
|
|
&& substr($htmlArray[$x], 1, 4) !== 'meta' |
311
|
|
|
&& substr($htmlArray[$x], 1, 4) !== 'col ' |
312
|
|
|
&& substr($htmlArray[$x], 1, 5) !== 'frame' |
313
|
|
|
&& substr($htmlArray[$x], 1, 7) !== 'isindex' |
314
|
|
|
&& substr($htmlArray[$x], 1, 5) !== 'param' |
315
|
|
|
&& substr($htmlArray[$x], 1, 4) !== 'area' |
316
|
|
|
&& substr($htmlArray[$x], 1, 4) !== 'base' |
317
|
|
|
&& substr($htmlArray[$x], 0, 2) !== '<!' |
318
|
|
|
&& substr($htmlArray[$x], 0, 5) !== '<?xml' |
319
|
|
|
) { |
320
|
|
|
$tabs++; |
321
|
|
|
} |
322
|
|
|
} |
323
|
|
|
} |
324
|
|
|
|
325
|
|
|
// Remove empty lines |
326
|
|
|
if ($this->formatType > 1) { |
327
|
|
|
$this->removeEmptyLines($html); |
328
|
|
|
} |
329
|
|
|
|
330
|
|
|
// Restore saved comments, styles and java-scripts |
331
|
|
|
for ($i = 0; $i < count($noFormat); $i++) { |
|
|
|
|
332
|
|
|
$noFormat[$i] = $this->rTrimLines($noFormat[$i]); // remove white space after line ending |
333
|
|
|
$html = str_replace("<!-- ELEMENT $i -->", $noFormat[$i], $html); |
334
|
|
|
} |
335
|
|
|
|
336
|
|
|
// include debug comment at the end |
337
|
|
|
if ($tabs != 0 && $this->debugComment === true) { |
338
|
|
|
$html .= '<!--' . $tabs . " open elements found-->\r\n"; |
339
|
|
|
} |
340
|
|
|
|
341
|
|
|
return $html; |
342
|
|
|
} |
343
|
|
|
|
344
|
|
|
/** |
345
|
|
|
* Remove ALL line breaks and multiple white space |
346
|
|
|
* |
347
|
|
|
* @param string $html |
348
|
|
|
* |
349
|
|
|
* @return string |
350
|
|
|
*/ |
351
|
|
|
protected function killLineBreaks($html) |
352
|
|
|
{ |
353
|
|
|
$html = $this->convNlOs($html); |
354
|
|
|
$html = str_replace($this->newline, "", $html); |
355
|
|
|
$html = preg_replace('/\s\s+/u', ' ', $html); |
356
|
|
|
return $html; |
357
|
|
|
} |
358
|
|
|
|
359
|
|
|
/** |
360
|
|
|
* Remove multiple white space, keeps line breaks |
361
|
|
|
* |
362
|
|
|
* @param string $html |
363
|
|
|
* |
364
|
|
|
* @return string |
365
|
|
|
*/ |
366
|
|
|
protected function killWhiteSpace($html) |
367
|
|
|
{ |
368
|
|
|
$html = $this->convNlOs($html); |
369
|
|
|
$temp = explode($this->newline, $html); |
370
|
|
|
for ($i = 0; $i < count($temp); $i++) { |
|
|
|
|
371
|
|
|
if (!trim($temp[$i])) { |
372
|
|
|
unset($temp[$i]); |
373
|
|
|
} else { |
374
|
|
|
$temp[$i] = trim($temp[$i]); |
375
|
|
|
$temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]); |
376
|
|
|
} |
377
|
|
|
} |
378
|
|
|
$html = implode($this->newline, $temp); |
379
|
|
|
return $html; |
380
|
|
|
} |
381
|
|
|
|
382
|
|
|
/** |
383
|
|
|
* Remove white space at the end of lines, keeps other white space and line breaks |
384
|
|
|
* |
385
|
|
|
* @param string $html |
386
|
|
|
* |
387
|
|
|
* @return string |
388
|
|
|
*/ |
389
|
|
|
protected function rTrimLines($html) |
390
|
|
|
{ |
391
|
|
|
$html = $this->convNlOs($html); |
392
|
|
|
$temp = explode($this->newline, $html); |
393
|
|
|
for ($i = 0; $i < count($temp); $i++) { |
|
|
|
|
394
|
|
|
$temp[$i] = rtrim($temp[$i]); |
395
|
|
|
} |
396
|
|
|
$html = implode($this->newline, $temp); |
397
|
|
|
return $html; |
398
|
|
|
} |
399
|
|
|
|
400
|
|
|
/** |
401
|
|
|
* Convert newlines according to the current OS |
402
|
|
|
* |
403
|
|
|
* @param string $html |
404
|
|
|
* |
405
|
|
|
* @return string |
406
|
|
|
*/ |
407
|
|
|
protected function convNlOs($html) |
408
|
|
|
{ |
409
|
|
|
$html = preg_replace("(\r\n|\n|\r)", $this->newline, $html); |
410
|
|
|
return $html; |
411
|
|
|
} |
412
|
|
|
|
413
|
|
|
/** |
414
|
|
|
* Remove tabs and empty spaces before and after lines, transforms linebreaks system conform |
415
|
|
|
* |
416
|
|
|
* @param string $html Html-Code |
417
|
|
|
* |
418
|
|
|
* @return void |
419
|
|
|
*/ |
420
|
|
|
protected function trimLines(&$html) |
421
|
|
|
{ |
422
|
|
|
$html = str_replace("\t", "", $html); |
423
|
|
|
// convert newlines according to the current OS |
424
|
|
|
if (Environment::isWindows()) { |
425
|
|
|
$html = str_replace("\n", "\r\n", $html); |
426
|
|
|
} else { |
427
|
|
|
$html = str_replace("\r\n", "\n", $html); |
428
|
|
|
} |
429
|
|
|
$temp = explode($this->newline, $html); |
430
|
|
|
$temp = array_map('trim', $temp); |
431
|
|
|
$html = implode($this->newline, $temp); |
432
|
|
|
unset($temp); |
433
|
|
|
} |
434
|
|
|
|
435
|
|
|
/** |
436
|
|
|
* Remove empty lines |
437
|
|
|
* |
438
|
|
|
* @param string $html |
439
|
|
|
* |
440
|
|
|
* @return void |
441
|
|
|
*/ |
442
|
|
|
protected function removeEmptyLines(&$html) |
443
|
|
|
{ |
444
|
|
|
$temp = explode($this->newline, $html); |
445
|
|
|
$result = []; |
446
|
|
|
for ($i = 0; $i < count($temp); ++$i) { |
|
|
|
|
447
|
|
|
if ("" == trim($temp[$i])) { |
448
|
|
|
continue; |
449
|
|
|
} |
450
|
|
|
$result[] = $temp[$i]; |
451
|
|
|
} |
452
|
|
|
$html = implode($this->newline, $result); |
453
|
|
|
} |
454
|
|
|
|
455
|
|
|
/** |
456
|
|
|
* Remove new lines where unnecessary |
457
|
|
|
* spares line breaks within: pre, textarea, ... |
458
|
|
|
* |
459
|
|
|
* @param string $html |
460
|
|
|
* |
461
|
|
|
* @return void |
462
|
|
|
*/ |
463
|
|
|
protected function removeNewLines(&$html) |
464
|
|
|
{ |
465
|
|
|
$splitArray = [ |
466
|
|
|
'textarea', |
467
|
|
|
'pre' |
468
|
|
|
]; // eventuell auch: span, script, style |
469
|
|
|
$peaces = preg_split('#(<(' . implode('|', $splitArray) . ').*>.*</\2>)#Uis', $html, -1, PREG_SPLIT_DELIM_CAPTURE); |
470
|
|
|
$html = ""; |
471
|
|
|
for ($i = 0; $i < count($peaces); $i++) { |
|
|
|
|
472
|
|
|
if (($i + 1) % 3 == 0) { |
473
|
|
|
continue; |
474
|
|
|
} |
475
|
|
|
$html .= (($i - 1) % 3 != 0) ? $this->killLineBreaks($peaces[$i]) : $peaces[$i]; |
476
|
|
|
} |
477
|
|
|
} |
478
|
|
|
|
479
|
|
|
/** |
480
|
|
|
* Remove obsolete link schema |
481
|
|
|
* |
482
|
|
|
* @param string $html |
483
|
|
|
* |
484
|
|
|
* @return void |
485
|
|
|
*/ |
486
|
|
|
protected function removeLinkSchema(&$html) |
487
|
|
|
{ |
488
|
|
|
$html = preg_replace("/<link rel=\"?schema.dc\"?.+?>/is", "", $html); |
489
|
|
|
} |
490
|
|
|
|
491
|
|
|
/** |
492
|
|
|
* Remove empty alt tags |
493
|
|
|
* |
494
|
|
|
* @param string $html |
495
|
|
|
* |
496
|
|
|
* @return void |
497
|
|
|
*/ |
498
|
|
|
protected function removeEmptyAltAtr(&$html) |
499
|
|
|
{ |
500
|
|
|
$html = str_replace("alt=\"\"", "", $html); |
501
|
|
|
} |
502
|
|
|
|
503
|
|
|
/** |
504
|
|
|
* Remove broken links in <a> tags |
505
|
|
|
* |
506
|
|
|
* @param string $html |
507
|
|
|
* |
508
|
|
|
* @return void |
509
|
|
|
*/ |
510
|
|
|
protected function removeRealUrlBrokenRootLink(&$html) |
511
|
|
|
{ |
512
|
|
|
$html = str_replace('href=".html"', 'href=""', $html); |
513
|
|
|
} |
514
|
|
|
|
515
|
|
|
/** |
516
|
|
|
* Include configured header comment in HTML content block |
517
|
|
|
* |
518
|
|
|
* @param $html |
519
|
|
|
*/ |
520
|
|
|
public function includeHeaderComment(&$html) |
521
|
|
|
{ |
522
|
|
|
if (!empty($this->headerComment)) { |
523
|
|
|
$html = preg_replace_callback('/<meta http-equiv(.*)>/Usi', function ($matches) { |
524
|
|
|
return trim($matches[0] . $this->newline . $this->tab . $this->tab . '<!-- ' . $this->headerComment . '-->'); |
525
|
|
|
}, $html, 1); |
526
|
|
|
} |
527
|
|
|
} |
528
|
|
|
} |
529
|
|
|
|
If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: