|
1
|
|
|
<?php |
|
2
|
|
|
namespace TYPO3\CMS\Core\Html; |
|
3
|
|
|
|
|
4
|
|
|
/* |
|
5
|
|
|
* This file is part of the TYPO3 CMS project. |
|
6
|
|
|
* |
|
7
|
|
|
* It is free software; you can redistribute it and/or modify it under |
|
8
|
|
|
* the terms of the GNU General Public License, either version 2 |
|
9
|
|
|
* of the License, or any later version. |
|
10
|
|
|
* |
|
11
|
|
|
* For the full copyright and license information, please read the |
|
12
|
|
|
* LICENSE.txt file that was distributed with this source code. |
|
13
|
|
|
* |
|
14
|
|
|
* The TYPO3 project - inspiring people to share! |
|
15
|
|
|
*/ |
|
16
|
|
|
|
|
17
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility; |
|
18
|
|
|
use TYPO3\CMS\Core\Utility\MathUtility; |
|
19
|
|
|
use TYPO3\CMS\Frontend\ContentObject\ContentObjectRenderer; |
|
20
|
|
|
|
|
21
|
|
|
/** |
|
22
|
|
|
* Functions for parsing HTML. |
|
23
|
|
|
* You are encouraged to use this class in your own applications |
|
24
|
|
|
*/ |
|
25
|
|
|
class HtmlParser |
|
26
|
|
|
{ |
|
27
|
|
|
/** |
|
28
|
|
|
* @var array |
|
29
|
|
|
*/ |
|
30
|
|
|
protected $caseShift_cache = []; |
|
31
|
|
|
|
|
32
|
|
|
// Void elements that do not have closing tags, as defined by HTML5, except link element |
|
33
|
|
|
const VOID_ELEMENTS = 'area|base|br|col|command|embed|hr|img|input|keygen|meta|param|source|track|wbr'; |
|
34
|
|
|
|
|
35
|
|
|
/************************************ |
|
36
|
|
|
* |
|
37
|
|
|
* Parsing HTML code |
|
38
|
|
|
* |
|
39
|
|
|
************************************/ |
|
40
|
|
|
/** |
|
41
|
|
|
* Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag |
|
42
|
|
|
* Even numbers in the array are outside the blocks, Odd numbers are block-content. |
|
43
|
|
|
* Use ->removeFirstAndLastTag() to process the content if needed. |
|
44
|
|
|
* |
|
45
|
|
|
* @param string $tag List of tags, comma separated. |
|
46
|
|
|
* @param string $content HTML-content |
|
47
|
|
|
* @param bool $eliminateExtraEndTags If set, excessive end tags are ignored - you should probably set this in most cases. |
|
48
|
|
|
* @return array Even numbers in the array are outside the blocks, Odd numbers are block-content. |
|
49
|
|
|
* @see splitTags(), removeFirstAndLastTag() |
|
50
|
|
|
*/ |
|
51
|
|
|
public function splitIntoBlock($tag, $content, $eliminateExtraEndTags = false) |
|
52
|
|
|
{ |
|
53
|
|
|
$tags = array_unique(GeneralUtility::trimExplode(',', $tag, true)); |
|
54
|
|
|
array_walk($tags, function (&$tag) { |
|
55
|
|
|
$tag = preg_quote($tag, '/'); |
|
56
|
|
|
}); |
|
57
|
|
|
$regexStr = '/\\<\\/?(' . implode('|', $tags) . ')(\\s*\\>|\\s[^\\>]*\\>)/si'; |
|
58
|
|
|
$parts = preg_split($regexStr, $content); |
|
59
|
|
|
$newParts = []; |
|
60
|
|
|
$pointer = strlen($parts[0]); |
|
61
|
|
|
$buffer = $parts[0]; |
|
62
|
|
|
$nested = 0; |
|
63
|
|
|
reset($parts); |
|
|
|
|
|
|
64
|
|
|
// We skip the first element in foreach loop |
|
65
|
|
|
$partsSliced = array_slice($parts, 1, null, true); |
|
|
|
|
|
|
66
|
|
|
foreach ($partsSliced as $v) { |
|
67
|
|
|
$isEndTag = substr($content, $pointer, 2) === '</'; |
|
68
|
|
|
$tagLen = strcspn(substr($content, $pointer), '>') + 1; |
|
69
|
|
|
// We meet a start-tag: |
|
70
|
|
|
if (!$isEndTag) { |
|
71
|
|
|
// Ground level: |
|
72
|
|
|
if (!$nested) { |
|
73
|
|
|
// Previous buffer stored |
|
74
|
|
|
$newParts[] = $buffer; |
|
75
|
|
|
$buffer = ''; |
|
76
|
|
|
} |
|
77
|
|
|
// We are inside now! |
|
78
|
|
|
$nested++; |
|
79
|
|
|
// New buffer set and pointer increased |
|
80
|
|
|
$mbuffer = substr($content, $pointer, strlen($v) + $tagLen); |
|
81
|
|
|
$pointer += strlen($mbuffer); |
|
82
|
|
|
$buffer .= $mbuffer; |
|
83
|
|
|
} else { |
|
84
|
|
|
// If we meet an endtag: |
|
85
|
|
|
// Decrease nested-level |
|
86
|
|
|
$nested--; |
|
87
|
|
|
$eliminated = 0; |
|
88
|
|
|
if ($eliminateExtraEndTags && $nested < 0) { |
|
89
|
|
|
$nested = 0; |
|
90
|
|
|
$eliminated = 1; |
|
91
|
|
|
} else { |
|
92
|
|
|
// In any case, add the endtag to current buffer and increase pointer |
|
93
|
|
|
$buffer .= substr($content, $pointer, $tagLen); |
|
94
|
|
|
} |
|
95
|
|
|
$pointer += $tagLen; |
|
96
|
|
|
// if we're back on ground level, (and not by eliminating tags... |
|
97
|
|
|
if (!$nested && !$eliminated) { |
|
98
|
|
|
$newParts[] = $buffer; |
|
99
|
|
|
$buffer = ''; |
|
100
|
|
|
} |
|
101
|
|
|
// New buffer set and pointer increased |
|
102
|
|
|
$mbuffer = substr($content, $pointer, strlen($v)); |
|
103
|
|
|
$pointer += strlen($mbuffer); |
|
104
|
|
|
$buffer .= $mbuffer; |
|
105
|
|
|
} |
|
106
|
|
|
} |
|
107
|
|
|
$newParts[] = $buffer; |
|
108
|
|
|
return $newParts; |
|
109
|
|
|
} |
|
110
|
|
|
|
|
111
|
|
|
/** |
|
112
|
|
|
* Splitting content into blocks *recursively* and processing tags/content with call back functions. |
|
113
|
|
|
* |
|
114
|
|
|
* @param string $tag Tag list, see splitIntoBlock() |
|
115
|
|
|
* @param string $content Content, see splitIntoBlock() |
|
116
|
|
|
* @param object $procObj Object where call back methods are. |
|
117
|
|
|
* @param string $callBackContent Name of call back method for content; "function callBackContent($str,$level) |
|
118
|
|
|
* @param string $callBackTags Name of call back method for tags; "function callBackTags($tags,$level) |
|
119
|
|
|
* @param int $level Indent level |
|
120
|
|
|
* @return string Processed content |
|
121
|
|
|
* @see splitIntoBlock() |
|
122
|
|
|
*/ |
|
123
|
|
|
public function splitIntoBlockRecursiveProc($tag, $content, &$procObj, $callBackContent, $callBackTags, $level = 0) |
|
124
|
|
|
{ |
|
125
|
|
|
$parts = $this->splitIntoBlock($tag, $content, true); |
|
126
|
|
|
foreach ($parts as $k => $v) { |
|
127
|
|
|
if ($k % 2) { |
|
128
|
|
|
$firstTagName = $this->getFirstTagName($v, true); |
|
129
|
|
|
$tagsArray = []; |
|
130
|
|
|
$tagsArray['tag_start'] = $this->getFirstTag($v); |
|
131
|
|
|
$tagsArray['tag_end'] = '</' . $firstTagName . '>'; |
|
132
|
|
|
$tagsArray['tag_name'] = strtolower($firstTagName); |
|
133
|
|
|
$tagsArray['content'] = $this->splitIntoBlockRecursiveProc($tag, $this->removeFirstAndLastTag($v), $procObj, $callBackContent, $callBackTags, $level + 1); |
|
134
|
|
|
if ($callBackTags) { |
|
135
|
|
|
$tagsArray = $procObj->{$callBackTags}($tagsArray, $level); |
|
136
|
|
|
} |
|
137
|
|
|
$parts[$k] = $tagsArray['tag_start'] . $tagsArray['content'] . $tagsArray['tag_end']; |
|
138
|
|
|
} else { |
|
139
|
|
|
if ($callBackContent) { |
|
140
|
|
|
$parts[$k] = $procObj->{$callBackContent}($parts[$k], $level); |
|
141
|
|
|
} |
|
142
|
|
|
} |
|
143
|
|
|
} |
|
144
|
|
|
return implode('', $parts); |
|
145
|
|
|
} |
|
146
|
|
|
|
|
147
|
|
|
/** |
|
148
|
|
|
* Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag |
|
149
|
|
|
* Even numbers in the array are outside the blocks, Odd numbers are block-content. |
|
150
|
|
|
* Use ->removeFirstAndLastTag() to process the content if needed. |
|
151
|
|
|
* |
|
152
|
|
|
* @param string $tag List of tags |
|
153
|
|
|
* @param string $content HTML-content |
|
154
|
|
|
* @return array Even numbers in the array are outside the blocks, Odd numbers are block-content. |
|
155
|
|
|
* @see splitIntoBlock(), removeFirstAndLastTag() |
|
156
|
|
|
*/ |
|
157
|
|
|
public function splitTags($tag, $content) |
|
158
|
|
|
{ |
|
159
|
|
|
$tags = GeneralUtility::trimExplode(',', $tag, true); |
|
160
|
|
|
array_walk($tags, function (&$tag) { |
|
161
|
|
|
$tag = preg_quote($tag, '/'); |
|
162
|
|
|
}); |
|
163
|
|
|
$regexStr = '/\\<(' . implode('|', $tags) . ')(\\s[^>]*)?\\/?>/si'; |
|
164
|
|
|
$parts = preg_split($regexStr, $content); |
|
165
|
|
|
$pointer = strlen($parts[0]); |
|
166
|
|
|
$newParts = []; |
|
167
|
|
|
$newParts[] = $parts[0]; |
|
168
|
|
|
reset($parts); |
|
|
|
|
|
|
169
|
|
|
// We skip the first element in foreach loop |
|
170
|
|
|
$partsSliced = array_slice($parts, 1, null, true); |
|
|
|
|
|
|
171
|
|
|
foreach ($partsSliced as $v) { |
|
172
|
|
|
$tagLen = strcspn(substr($content, $pointer), '>') + 1; |
|
173
|
|
|
// Set tag: |
|
174
|
|
|
// New buffer set and pointer increased |
|
175
|
|
|
$tag = substr($content, $pointer, $tagLen); |
|
176
|
|
|
$newParts[] = $tag; |
|
177
|
|
|
$pointer += strlen($tag); |
|
178
|
|
|
// Set content: |
|
179
|
|
|
$newParts[] = $v; |
|
180
|
|
|
$pointer += strlen($v); |
|
181
|
|
|
} |
|
182
|
|
|
return $newParts; |
|
183
|
|
|
} |
|
184
|
|
|
|
|
185
|
|
|
/** |
|
186
|
|
|
* Removes the first and last tag in the string |
|
187
|
|
|
* Anything before the first and after the last tags respectively is also removed |
|
188
|
|
|
* |
|
189
|
|
|
* @param string $str String to process |
|
190
|
|
|
* @return string |
|
191
|
|
|
*/ |
|
192
|
|
|
public function removeFirstAndLastTag($str) |
|
193
|
|
|
{ |
|
194
|
|
|
// End of first tag: |
|
195
|
|
|
$start = strpos($str, '>'); |
|
196
|
|
|
// Begin of last tag: |
|
197
|
|
|
$end = strrpos($str, '<'); |
|
198
|
|
|
// Return |
|
199
|
|
|
return substr($str, $start + 1, $end - $start - 1); |
|
200
|
|
|
} |
|
201
|
|
|
|
|
202
|
|
|
/** |
|
203
|
|
|
* Returns the first tag in $str |
|
204
|
|
|
* Actually everything from the beginning of the $str is returned, so you better make sure the tag is the first thing... |
|
205
|
|
|
* |
|
206
|
|
|
* @param string $str HTML string with tags |
|
207
|
|
|
* @return string |
|
208
|
|
|
*/ |
|
209
|
|
|
public function getFirstTag($str) |
|
210
|
|
|
{ |
|
211
|
|
|
// First: |
|
212
|
|
|
$endLen = strpos($str, '>'); |
|
213
|
|
|
return $endLen !== false ? substr($str, 0, $endLen + 1) : ''; |
|
214
|
|
|
} |
|
215
|
|
|
|
|
216
|
|
|
/** |
|
217
|
|
|
* Returns the NAME of the first tag in $str |
|
218
|
|
|
* |
|
219
|
|
|
* @param string $str HTML tag (The element name MUST be separated from the attributes by a space character! Just *whitespace* will not do) |
|
220
|
|
|
* @param bool $preserveCase If set, then the tag is NOT converted to uppercase by case is preserved. |
|
221
|
|
|
* @return string Tag name in upper case |
|
222
|
|
|
* @see getFirstTag() |
|
223
|
|
|
*/ |
|
224
|
|
|
public function getFirstTagName($str, $preserveCase = false) |
|
225
|
|
|
{ |
|
226
|
|
|
$matches = []; |
|
227
|
|
|
if (preg_match('/^\\s*\\<([^\\s\\>]+)(\\s|\\>)/', $str, $matches) === 1) { |
|
228
|
|
|
if (!$preserveCase) { |
|
229
|
|
|
return strtoupper($matches[1]); |
|
230
|
|
|
} |
|
231
|
|
|
return $matches[1]; |
|
232
|
|
|
} |
|
233
|
|
|
return ''; |
|
234
|
|
|
} |
|
235
|
|
|
|
|
236
|
|
|
/** |
|
237
|
|
|
* Returns an array with all attributes as keys. Attributes are only lowercase a-z |
|
238
|
|
|
* If an attribute is empty (shorthand), then the value for the key is empty. You can check if it existed with isset() |
|
239
|
|
|
* |
|
240
|
|
|
* Compared to the method in GeneralUtility::get_tag_attributes this method also returns meta data about each |
|
241
|
|
|
* attribute, e.g. if it is a shorthand attribute, and what the quotation is. Also, since all attribute keys |
|
242
|
|
|
* are lower-cased, the meta information contains the original attribute name. |
|
243
|
|
|
* |
|
244
|
|
|
* @param string $tag Tag: $tag is either a whole tag (eg '<TAG OPTION ATTRIB=VALUE>') or the parameterlist (ex ' OPTION ATTRIB=VALUE>') |
|
245
|
|
|
* @param bool $deHSC If set, the attribute values are de-htmlspecialchar'ed. Should actually always be set! |
|
246
|
|
|
* @return array array(Tag attributes,Attribute meta-data) |
|
247
|
|
|
*/ |
|
248
|
|
|
public function get_tag_attributes($tag, $deHSC = false) |
|
249
|
|
|
{ |
|
250
|
|
|
list($components, $metaC) = $this->split_tag_attributes($tag); |
|
251
|
|
|
// Attribute name is stored here |
|
252
|
|
|
$name = ''; |
|
253
|
|
|
$valuemode = false; |
|
254
|
|
|
$attributes = []; |
|
255
|
|
|
$attributesMeta = []; |
|
256
|
|
|
if (is_array($components)) { |
|
257
|
|
|
foreach ($components as $key => $val) { |
|
258
|
|
|
// Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value |
|
259
|
|
|
if ($val !== '=') { |
|
260
|
|
|
if ($valuemode) { |
|
261
|
|
|
if ($name) { |
|
262
|
|
|
$attributes[$name] = $deHSC ? htmlspecialchars_decode($val) : $val; |
|
263
|
|
|
$attributesMeta[$name]['dashType'] = $metaC[$key]; |
|
264
|
|
|
$name = ''; |
|
265
|
|
|
} |
|
266
|
|
|
} else { |
|
267
|
|
|
if ($namekey = preg_replace('/[^[:alnum:]_\\:\\-]/', '', $val)) { |
|
268
|
|
|
$name = strtolower($namekey); |
|
269
|
|
|
$attributesMeta[$name] = []; |
|
270
|
|
|
$attributesMeta[$name]['origTag'] = $namekey; |
|
271
|
|
|
$attributes[$name] = ''; |
|
272
|
|
|
} |
|
273
|
|
|
} |
|
274
|
|
|
$valuemode = false; |
|
275
|
|
|
} else { |
|
276
|
|
|
$valuemode = true; |
|
277
|
|
|
} |
|
278
|
|
|
} |
|
279
|
|
|
return [$attributes, $attributesMeta]; |
|
280
|
|
|
} |
|
281
|
|
|
} |
|
282
|
|
|
|
|
283
|
|
|
/** |
|
284
|
|
|
* Returns an array with the 'components' from an attribute list. |
|
285
|
|
|
* The result is normally analyzed by get_tag_attributes |
|
286
|
|
|
* Removes tag-name if found. |
|
287
|
|
|
* |
|
288
|
|
|
* The difference between this method and the one in GeneralUtility is that this method actually determines |
|
289
|
|
|
* more information on the attribute, e.g. if the value is enclosed by a " or ' character. |
|
290
|
|
|
* That's why this method returns two arrays, the "components" and the "meta-information" of the "components". |
|
291
|
|
|
* |
|
292
|
|
|
* @param string $tag The tag or attributes |
|
293
|
|
|
* @return array |
|
294
|
|
|
* @access private |
|
295
|
|
|
* @see \TYPO3\CMS\Core\Utility\GeneralUtility::split_tag_attributes() |
|
296
|
|
|
*/ |
|
297
|
|
|
public function split_tag_attributes($tag) |
|
298
|
|
|
{ |
|
299
|
|
|
$matches = []; |
|
300
|
|
|
if (preg_match('/(\\<[^\\s]+\\s+)?(.*?)\\s*(\\>)?$/s', $tag, $matches) !== 1) { |
|
301
|
|
|
return [[], []]; |
|
302
|
|
|
} |
|
303
|
|
|
$tag_tmp = $matches[2]; |
|
304
|
|
|
$metaValue = []; |
|
305
|
|
|
$value = []; |
|
306
|
|
|
$matches = []; |
|
307
|
|
|
if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\\s"\'\\=]+|\\=)/s', $tag_tmp, $matches) > 0) { |
|
308
|
|
|
foreach ($matches[1] as $part) { |
|
309
|
|
|
$firstChar = $part[0]; |
|
310
|
|
|
if ($firstChar === '"' || $firstChar === '\'') { |
|
311
|
|
|
$metaValue[] = $firstChar; |
|
312
|
|
|
$value[] = substr($part, 1, -1); |
|
313
|
|
|
} else { |
|
314
|
|
|
$metaValue[] = ''; |
|
315
|
|
|
$value[] = $part; |
|
316
|
|
|
} |
|
317
|
|
|
} |
|
318
|
|
|
} |
|
319
|
|
|
return [$value, $metaValue]; |
|
320
|
|
|
} |
|
321
|
|
|
|
|
322
|
|
|
/********************************* |
|
323
|
|
|
* |
|
324
|
|
|
* Clean HTML code |
|
325
|
|
|
* |
|
326
|
|
|
*********************************/ |
|
327
|
|
|
/** |
|
328
|
|
|
* Function that can clean up HTML content according to configuration given in the $tags array. |
|
329
|
|
|
* |
|
330
|
|
|
* Initializing the $tags array to allow a list of tags (in this case <B>,<I>,<U> and <A>), set it like this: $tags = array_flip(explode(',','b,a,i,u')) |
|
331
|
|
|
* If the value of the $tags[$tagname] entry is an array, advanced processing of the tags is initialized. These are the options: |
|
332
|
|
|
* |
|
333
|
|
|
* $tags[$tagname] = Array( |
|
334
|
|
|
* 'overrideAttribs' => '' If set, this string is preset as the attributes of the tag |
|
335
|
|
|
* 'allowedAttribs' => '0' (zero) = no attributes allowed, '[commalist of attributes]' = only allowed attributes. If blank, all attributes are allowed. |
|
336
|
|
|
* 'fixAttrib' => Array( |
|
337
|
|
|
* '[attribute name]' => Array ( |
|
338
|
|
|
* 'set' => Force the attribute value to this value. |
|
339
|
|
|
* 'unset' => Boolean: If set, the attribute is unset. |
|
340
|
|
|
* 'default' => If no attribute exists by this name, this value is set as default value (if this value is not blank) |
|
341
|
|
|
* 'always' => Boolean. If set, the attribute is always processed. Normally an attribute is processed only if it exists |
|
342
|
|
|
* 'trim,intval,lower,upper' => All booleans. If any of these keys are set, the value is passed through the respective PHP-functions. |
|
343
|
|
|
* 'range' => Array ('[low limit]','[high limit, optional]') Setting integer range. |
|
344
|
|
|
* 'list' => Array ('[value1/default]','[value2]','[value3]') Attribute must be in this list. If not, the value is set to the first element. |
|
345
|
|
|
* 'removeIfFalse' => Boolean/'blank'. If set, then the attribute is removed if it is 'FALSE'. If this value is set to 'blank' then the value must be a blank string (that means a 'zero' value will not be removed) |
|
346
|
|
|
* 'removeIfEquals' => [value] If the attribute value matches the value set here, then it is removed. |
|
347
|
|
|
* 'casesensitiveComp' => 1 If set, then the removeIfEquals and list comparisons will be case sensitive. Otherwise not. |
|
348
|
|
|
* ) |
|
349
|
|
|
* ), |
|
350
|
|
|
* 'protect' => '', Boolean. If set, the tag <> is converted to < and > |
|
351
|
|
|
* 'remap' => '', String. If set, the tagname is remapped to this tagname |
|
352
|
|
|
* 'rmTagIfNoAttrib' => '', Boolean. If set, then the tag is removed if no attributes happened to be there. |
|
353
|
|
|
* 'nesting' => '', Boolean/'global'. If set TRUE, then this tag must have starting and ending tags in the correct order. Any tags not in this order will be discarded. Thus '</B><B><I></B></I></B>' will be converted to '<B><I></B></I>'. Is the value 'global' then true nesting in relation to other tags marked for 'global' nesting control is preserved. This means that if <B> and <I> are set for global nesting then this string '</B><B><I></B></I></B>' is converted to '<B></B>' |
|
354
|
|
|
* ) |
|
355
|
|
|
* |
|
356
|
|
|
* @param string $content Is the HTML-content being processed. This is also the result being returned. |
|
357
|
|
|
* @param array $tags Is an array where each key is a tagname in lowercase. Only tags present as keys in this array are preserved. The value of the key can be an array with a vast number of options to configure. |
|
358
|
|
|
* @param mixed $keepAll Boolean/'protect', if set, then all tags are kept regardless of tags present as keys in $tags-array. If 'protect' then the preserved tags have their <> converted to < and > |
|
359
|
|
|
* @param int $hSC Values -1,0,1,2: Set to zero= disabled, set to 1 then the content BETWEEN tags is htmlspecialchar()'ed, set to -1 its the opposite and set to 2 the content will be HSC'ed BUT with preservation for real entities (eg. "&" or "ê") |
|
360
|
|
|
* @param array $addConfig Configuration array send along as $conf to the internal functions |
|
361
|
|
|
* @return string Processed HTML content |
|
362
|
|
|
*/ |
|
363
|
|
|
public function HTMLcleaner($content, $tags = [], $keepAll = 0, $hSC = 0, $addConfig = []) |
|
364
|
|
|
{ |
|
365
|
|
|
$newContent = []; |
|
366
|
|
|
$tokArr = explode('<', $content); |
|
367
|
|
|
$newContent[] = $this->bidir_htmlspecialchars(current($tokArr), $hSC); |
|
368
|
|
|
// We skip the first element in foreach loop |
|
369
|
|
|
$tokArrSliced = array_slice($tokArr, 1, null, true); |
|
370
|
|
|
$c = 1; |
|
371
|
|
|
$tagRegister = []; |
|
372
|
|
|
$tagStack = []; |
|
373
|
|
|
$inComment = false; |
|
374
|
|
|
$inCdata = false; |
|
375
|
|
|
$skipTag = false; |
|
376
|
|
|
foreach ($tokArrSliced as $tok) { |
|
377
|
|
|
if ($inComment) { |
|
378
|
|
|
if (($eocPos = strpos($tok, '-->')) === false) { |
|
379
|
|
|
// End of comment is not found in the token. Go further until end of comment is found in other tokens. |
|
380
|
|
|
$newContent[$c++] = '<' . $tok; |
|
381
|
|
|
continue; |
|
382
|
|
|
} |
|
383
|
|
|
// Comment ends in the middle of the token: add comment and proceed with rest of the token |
|
384
|
|
|
$newContent[$c++] = '<' . substr($tok, 0, ($eocPos + 3)); |
|
385
|
|
|
$tok = substr($tok, $eocPos + 3); |
|
386
|
|
|
$inComment = false; |
|
387
|
|
|
$skipTag = true; |
|
388
|
|
|
} elseif ($inCdata) { |
|
389
|
|
|
if (($eocPos = strpos($tok, '/*]]>*/')) === false) { |
|
390
|
|
|
// End of comment is not found in the token. Go further until end of comment is found in other tokens. |
|
391
|
|
|
$newContent[$c++] = '<' . $tok; |
|
392
|
|
|
continue; |
|
393
|
|
|
} |
|
394
|
|
|
// Comment ends in the middle of the token: add comment and proceed with rest of the token |
|
395
|
|
|
$newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10); |
|
396
|
|
|
$tok = substr($tok, $eocPos + 10); |
|
397
|
|
|
$inCdata = false; |
|
398
|
|
|
$skipTag = true; |
|
399
|
|
|
} elseif (substr($tok, 0, 3) === '!--') { |
|
400
|
|
|
if (($eocPos = strpos($tok, '-->')) === false) { |
|
401
|
|
|
// Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment |
|
402
|
|
|
$newContent[$c++] = '<' . $tok; |
|
403
|
|
|
$inComment = true; |
|
404
|
|
|
continue; |
|
405
|
|
|
} |
|
406
|
|
|
// Start and end of comment are both in the current token. Add comment and proceed with rest of the token |
|
407
|
|
|
$newContent[$c++] = '<' . substr($tok, 0, ($eocPos + 3)); |
|
408
|
|
|
$tok = substr($tok, $eocPos + 3); |
|
409
|
|
|
$skipTag = true; |
|
410
|
|
|
} elseif (substr($tok, 0, 10) === '![CDATA[*/') { |
|
411
|
|
|
if (($eocPos = strpos($tok, '/*]]>*/')) === false) { |
|
412
|
|
|
// Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment |
|
413
|
|
|
$newContent[$c++] = '<' . $tok; |
|
414
|
|
|
$inCdata = true; |
|
415
|
|
|
continue; |
|
416
|
|
|
} |
|
417
|
|
|
// Start and end of comment are both in the current token. Add comment and proceed with rest of the token |
|
418
|
|
|
$newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10); |
|
419
|
|
|
$tok = substr($tok, $eocPos + 10); |
|
420
|
|
|
$skipTag = true; |
|
421
|
|
|
} |
|
422
|
|
|
$firstChar = $tok[0]; |
|
423
|
|
|
// It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..> |
|
424
|
|
|
if (!$skipTag && preg_match('/[[:alnum:]\\/]/', $firstChar) == 1) { |
|
425
|
|
|
$tagEnd = strpos($tok, '>'); |
|
426
|
|
|
// If there is and end-bracket... tagEnd can't be 0 as the first character can't be a > |
|
427
|
|
|
if ($tagEnd) { |
|
428
|
|
|
$endTag = $firstChar === '/' ? 1 : 0; |
|
429
|
|
|
$tagContent = substr($tok, $endTag, $tagEnd - $endTag); |
|
430
|
|
|
$tagParts = preg_split('/\\s+/s', $tagContent, 2); |
|
431
|
|
|
$tagName = strtolower($tagParts[0]); |
|
432
|
|
|
$emptyTag = 0; |
|
433
|
|
|
if (isset($tags[$tagName])) { |
|
434
|
|
|
// If there is processing to do for the tag: |
|
435
|
|
|
if (is_array($tags[$tagName])) { |
|
436
|
|
|
if (preg_match('/^(' . self::VOID_ELEMENTS . ' )$/i', $tagName)) { |
|
437
|
|
|
$emptyTag = 1; |
|
438
|
|
|
} |
|
439
|
|
|
// If NOT an endtag, do attribute processing (added dec. 2003) |
|
440
|
|
|
if (!$endTag) { |
|
441
|
|
|
// Override attributes |
|
442
|
|
|
if ((string)$tags[$tagName]['overrideAttribs'] !== '') { |
|
443
|
|
|
$tagParts[1] = $tags[$tagName]['overrideAttribs']; |
|
444
|
|
|
} |
|
445
|
|
|
// Allowed tags |
|
446
|
|
|
if ((string)$tags[$tagName]['allowedAttribs'] !== '') { |
|
447
|
|
|
// No attribs allowed |
|
448
|
|
|
if ((string)$tags[$tagName]['allowedAttribs'] === '0') { |
|
449
|
|
|
$tagParts[1] = ''; |
|
450
|
|
|
} elseif (trim($tagParts[1])) { |
|
451
|
|
|
$tagAttrib = $this->get_tag_attributes($tagParts[1]); |
|
452
|
|
|
$tagParts[1] = ''; |
|
453
|
|
|
$newTagAttrib = []; |
|
454
|
|
|
if (!($tList = $tags[$tagName]['_allowedAttribs'])) { |
|
455
|
|
|
// Just explode attribts for tag once |
|
456
|
|
|
$tList = ($tags[$tagName]['_allowedAttribs'] = GeneralUtility::trimExplode(',', strtolower($tags[$tagName]['allowedAttribs']), true)); |
|
457
|
|
|
} |
|
458
|
|
|
foreach ($tList as $allowTag) { |
|
459
|
|
|
if (isset($tagAttrib[0][$allowTag])) { |
|
460
|
|
|
$newTagAttrib[$allowTag] = $tagAttrib[0][$allowTag]; |
|
461
|
|
|
} |
|
462
|
|
|
} |
|
463
|
|
|
$tagParts[1] = $this->compileTagAttribs($newTagAttrib, $tagAttrib[1]); |
|
464
|
|
|
} |
|
465
|
|
|
} |
|
466
|
|
|
// Fixed attrib values |
|
467
|
|
|
if (is_array($tags[$tagName]['fixAttrib'])) { |
|
468
|
|
|
$tagAttrib = $this->get_tag_attributes($tagParts[1]); |
|
469
|
|
|
$tagParts[1] = ''; |
|
470
|
|
|
foreach ($tags[$tagName]['fixAttrib'] as $attr => $params) { |
|
471
|
|
|
if (isset($params['set']) && $params['set'] !== '') { |
|
472
|
|
|
$tagAttrib[0][$attr] = $params['set']; |
|
473
|
|
|
} |
|
474
|
|
|
if (!empty($params['unset'])) { |
|
475
|
|
|
unset($tagAttrib[0][$attr]); |
|
476
|
|
|
} |
|
477
|
|
|
if (!isset($tagAttrib[0][$attr]) && (string)$params['default'] !== '') { |
|
478
|
|
|
$tagAttrib[0][$attr] = $params['default']; |
|
479
|
|
|
} |
|
480
|
|
|
if ($params['always'] || isset($tagAttrib[0][$attr])) { |
|
481
|
|
|
if ($params['trim']) { |
|
482
|
|
|
$tagAttrib[0][$attr] = trim($tagAttrib[0][$attr]); |
|
483
|
|
|
} |
|
484
|
|
|
if ($params['intval']) { |
|
485
|
|
|
$tagAttrib[0][$attr] = (int)$tagAttrib[0][$attr]; |
|
486
|
|
|
} |
|
487
|
|
|
if ($params['lower']) { |
|
488
|
|
|
$tagAttrib[0][$attr] = strtolower($tagAttrib[0][$attr]); |
|
489
|
|
|
} |
|
490
|
|
|
if ($params['upper']) { |
|
491
|
|
|
$tagAttrib[0][$attr] = strtoupper($tagAttrib[0][$attr]); |
|
492
|
|
|
} |
|
493
|
|
|
if ($params['range']) { |
|
494
|
|
|
if (isset($params['range'][1])) { |
|
495
|
|
|
$tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0], (int)$params['range'][1]); |
|
496
|
|
|
} else { |
|
497
|
|
|
$tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0]); |
|
498
|
|
|
} |
|
499
|
|
|
} |
|
500
|
|
|
if (is_array($params['list'])) { |
|
501
|
|
|
// For the class attribute, remove from the attribute value any class not in the list |
|
502
|
|
|
// Classes are case sensitive |
|
503
|
|
|
if ($attr === 'class') { |
|
504
|
|
|
$newClasses = []; |
|
505
|
|
|
$classes = GeneralUtility::trimExplode(' ', $tagAttrib[0][$attr], true); |
|
506
|
|
|
foreach ($classes as $class) { |
|
507
|
|
|
if (in_array($class, $params['list'])) { |
|
508
|
|
|
$newClasses[] = $class; |
|
509
|
|
|
} |
|
510
|
|
|
} |
|
511
|
|
|
if (!empty($newClasses)) { |
|
512
|
|
|
$tagAttrib[0][$attr] = implode(' ', $newClasses); |
|
513
|
|
|
} else { |
|
514
|
|
|
$tagAttrib[0][$attr] = $params['list'][0]; |
|
515
|
|
|
} |
|
516
|
|
|
} else { |
|
517
|
|
|
if (!in_array($this->caseShift($tagAttrib[0][$attr], $params['casesensitiveComp']), $this->caseShift($params['list'], $params['casesensitiveComp'], $tagName))) { |
|
|
|
|
|
|
518
|
|
|
$tagAttrib[0][$attr] = $params['list'][0]; |
|
519
|
|
|
} |
|
520
|
|
|
} |
|
521
|
|
|
} |
|
522
|
|
|
if ($params['removeIfFalse'] && $params['removeIfFalse'] !== 'blank' && !$tagAttrib[0][$attr] || $params['removeIfFalse'] === 'blank' && (string)$tagAttrib[0][$attr] === '') { |
|
523
|
|
|
unset($tagAttrib[0][$attr]); |
|
524
|
|
|
} |
|
525
|
|
|
if ((string)$params['removeIfEquals'] !== '' && $this->caseShift($tagAttrib[0][$attr], $params['casesensitiveComp']) === $this->caseShift($params['removeIfEquals'], $params['casesensitiveComp'])) { |
|
526
|
|
|
unset($tagAttrib[0][$attr]); |
|
527
|
|
|
} |
|
528
|
|
|
if ($params['prefixLocalAnchors']) { |
|
529
|
|
|
if ($tagAttrib[0][$attr][0] === '#') { |
|
530
|
|
|
if ($params['prefixLocalAnchors'] == 2) { |
|
531
|
|
|
/** @var ContentObjectRenderer $contentObjectRenderer */ |
|
532
|
|
|
$contentObjectRenderer = GeneralUtility::makeInstance(ContentObjectRenderer::class); |
|
533
|
|
|
$prefix = $contentObjectRenderer->getUrlToCurrentLocation(); |
|
534
|
|
|
} else { |
|
535
|
|
|
$prefix = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL'); |
|
536
|
|
|
} |
|
537
|
|
|
$tagAttrib[0][$attr] = $prefix . $tagAttrib[0][$attr]; |
|
538
|
|
|
} |
|
539
|
|
|
} |
|
540
|
|
|
if ($params['prefixRelPathWith']) { |
|
541
|
|
|
$urlParts = parse_url($tagAttrib[0][$attr]); |
|
542
|
|
|
if (!$urlParts['scheme'] && $urlParts['path'][0] !== '/') { |
|
543
|
|
|
// If it is NOT an absolute URL (by http: or starting "/") |
|
544
|
|
|
$tagAttrib[0][$attr] = $params['prefixRelPathWith'] . $tagAttrib[0][$attr]; |
|
545
|
|
|
} |
|
546
|
|
|
} |
|
547
|
|
|
if ($params['userFunc']) { |
|
548
|
|
|
if (is_array($params['userFunc.'])) { |
|
549
|
|
|
$params['userFunc.']['attributeValue'] = $tagAttrib[0][$attr]; |
|
550
|
|
|
} else { |
|
551
|
|
|
$params['userFunc.'] = $tagAttrib[0][$attr]; |
|
552
|
|
|
} |
|
553
|
|
|
$tagAttrib[0][$attr] = GeneralUtility::callUserFunction($params['userFunc'], $params['userFunc.'], $this); |
|
554
|
|
|
} |
|
555
|
|
|
} |
|
556
|
|
|
} |
|
557
|
|
|
$tagParts[1] = $this->compileTagAttribs($tagAttrib[0], $tagAttrib[1]); |
|
558
|
|
|
} |
|
559
|
|
|
} else { |
|
560
|
|
|
// If endTag, remove any possible attributes: |
|
561
|
|
|
$tagParts[1] = ''; |
|
562
|
|
|
} |
|
563
|
|
|
// Protecting the tag by converting < and > to < and > ?? |
|
564
|
|
|
if ($tags[$tagName]['protect']) { |
|
565
|
|
|
$lt = '<'; |
|
566
|
|
|
$gt = '>'; |
|
567
|
|
|
} else { |
|
568
|
|
|
$lt = '<'; |
|
569
|
|
|
$gt = '>'; |
|
570
|
|
|
} |
|
571
|
|
|
// Remapping tag name? |
|
572
|
|
|
if ($tags[$tagName]['remap']) { |
|
573
|
|
|
$tagParts[0] = $tags[$tagName]['remap']; |
|
574
|
|
|
} |
|
575
|
|
|
// rmTagIfNoAttrib |
|
576
|
|
|
if ($endTag || trim($tagParts[1]) || !$tags[$tagName]['rmTagIfNoAttrib']) { |
|
577
|
|
|
$setTag = 1; |
|
578
|
|
|
// Remove this closing tag if $tagName was among $TSconfig['removeTags'] |
|
579
|
|
|
if ($endTag && $tags[$tagName]['allowedAttribs'] === 0 && $tags[$tagName]['rmTagIfNoAttrib'] === 1) { |
|
580
|
|
|
$setTag = 0; |
|
581
|
|
|
} |
|
582
|
|
|
if ($tags[$tagName]['nesting']) { |
|
583
|
|
|
if (!is_array($tagRegister[$tagName])) { |
|
584
|
|
|
$tagRegister[$tagName] = []; |
|
585
|
|
|
} |
|
586
|
|
|
if ($endTag) { |
|
587
|
|
|
$correctTag = 1; |
|
588
|
|
|
if ($tags[$tagName]['nesting'] === 'global') { |
|
589
|
|
|
$lastEl = end($tagStack); |
|
590
|
|
|
if ($tagName !== $lastEl) { |
|
591
|
|
|
if (in_array($tagName, $tagStack)) { |
|
592
|
|
|
while (!empty($tagStack) && $tagName !== $lastEl) { |
|
593
|
|
|
$elPos = end($tagRegister[$lastEl]); |
|
594
|
|
|
unset($newContent[$elPos]); |
|
595
|
|
|
array_pop($tagRegister[$lastEl]); |
|
596
|
|
|
array_pop($tagStack); |
|
597
|
|
|
$lastEl = end($tagStack); |
|
598
|
|
|
} |
|
599
|
|
|
} else { |
|
600
|
|
|
// In this case the |
|
601
|
|
|
$correctTag = 0; |
|
602
|
|
|
} |
|
603
|
|
|
} |
|
604
|
|
|
} |
|
605
|
|
|
if (empty($tagRegister[$tagName]) || !$correctTag) { |
|
606
|
|
|
$setTag = 0; |
|
607
|
|
|
} else { |
|
608
|
|
|
array_pop($tagRegister[$tagName]); |
|
609
|
|
|
if ($tags[$tagName]['nesting'] === 'global') { |
|
610
|
|
|
array_pop($tagStack); |
|
611
|
|
|
} |
|
612
|
|
|
} |
|
613
|
|
|
} else { |
|
614
|
|
|
$tagRegister[$tagName][] = $c; |
|
615
|
|
|
if ($tags[$tagName]['nesting'] === 'global') { |
|
616
|
|
|
$tagStack[] = $tagName; |
|
617
|
|
|
} |
|
618
|
|
|
} |
|
619
|
|
|
} |
|
620
|
|
|
if ($setTag) { |
|
621
|
|
|
// Setting the tag |
|
622
|
|
|
$newContent[$c++] = $lt . ($endTag ? '/' : '') . trim($tagParts[0] . ' ' . $tagParts[1]) . ($emptyTag ? ' /' : '') . $gt; |
|
623
|
|
|
} |
|
624
|
|
|
} |
|
625
|
|
|
} else { |
|
626
|
|
|
$newContent[$c++] = '<' . ($endTag ? '/' : '') . $tagContent . '>'; |
|
627
|
|
|
} |
|
628
|
|
|
} elseif ($keepAll) { |
|
629
|
|
|
// This is if the tag was not defined in the array for processing: |
|
630
|
|
|
if ($keepAll === 'protect') { |
|
631
|
|
|
$lt = '<'; |
|
632
|
|
|
$gt = '>'; |
|
633
|
|
|
} else { |
|
634
|
|
|
$lt = '<'; |
|
635
|
|
|
$gt = '>'; |
|
636
|
|
|
} |
|
637
|
|
|
$newContent[$c++] = $lt . ($endTag ? '/' : '') . $tagContent . $gt; |
|
638
|
|
|
} |
|
639
|
|
|
$newContent[$c++] = $this->bidir_htmlspecialchars(substr($tok, $tagEnd + 1), $hSC); |
|
640
|
|
|
} else { |
|
641
|
|
|
$newContent[$c++] = $this->bidir_htmlspecialchars('<' . $tok, $hSC); |
|
642
|
|
|
} |
|
643
|
|
|
} else { |
|
644
|
|
|
$newContent[$c++] = $this->bidir_htmlspecialchars(($skipTag ? '' : '<') . $tok, $hSC); |
|
645
|
|
|
// It was not a tag anyways |
|
646
|
|
|
$skipTag = false; |
|
647
|
|
|
} |
|
648
|
|
|
} |
|
649
|
|
|
// Unsetting tags: |
|
650
|
|
|
foreach ($tagRegister as $tag => $positions) { |
|
651
|
|
|
foreach ($positions as $pKey) { |
|
652
|
|
|
unset($newContent[$pKey]); |
|
653
|
|
|
} |
|
654
|
|
|
} |
|
655
|
|
|
$newContent = implode('', $newContent); |
|
656
|
|
|
$newContent = $this->stripEmptyTagsIfConfigured($newContent, $addConfig); |
|
657
|
|
|
return $newContent; |
|
658
|
|
|
} |
|
659
|
|
|
|
|
660
|
|
|
/** |
|
661
|
|
|
* Converts htmlspecialchars forth ($dir=1) AND back ($dir=-1) |
|
662
|
|
|
* |
|
663
|
|
|
* @param string $value Input value |
|
664
|
|
|
* @param int $dir Direction: forth ($dir=1, dir=2 for preserving entities) AND back ($dir=-1) |
|
665
|
|
|
* @return string Output value |
|
666
|
|
|
*/ |
|
667
|
|
|
public function bidir_htmlspecialchars($value, $dir) |
|
668
|
|
|
{ |
|
669
|
|
|
switch ((int)$dir) { |
|
670
|
|
|
case 1: |
|
671
|
|
|
return htmlspecialchars($value); |
|
672
|
|
|
case 2: |
|
673
|
|
|
return htmlspecialchars($value, ENT_COMPAT, 'UTF-8', false); |
|
674
|
|
|
case -1: |
|
675
|
|
|
return htmlspecialchars_decode($value); |
|
676
|
|
|
default: |
|
677
|
|
|
return $value; |
|
678
|
|
|
} |
|
679
|
|
|
} |
|
680
|
|
|
|
|
681
|
|
|
/** |
|
682
|
|
|
* Prefixes the relative paths of hrefs/src/action in the tags [td,table,body,img,input,form,link,script,a] in the $content with the $main_prefix or and alternative given by $alternatives |
|
683
|
|
|
* |
|
684
|
|
|
* @param string $main_prefix Prefix string |
|
685
|
|
|
* @param string $content HTML content |
|
686
|
|
|
* @param array $alternatives Array with alternative prefixes for certain of the tags. key=>value pairs where the keys are the tag element names in uppercase |
|
687
|
|
|
* @param string $suffix Suffix string (put after the resource). |
|
688
|
|
|
* @return string Processed HTML content |
|
689
|
|
|
*/ |
|
690
|
|
|
public function prefixResourcePath($main_prefix, $content, $alternatives = [], $suffix = '') |
|
691
|
|
|
{ |
|
692
|
|
|
$parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a,param', $content); |
|
693
|
|
|
foreach ($parts as $k => $v) { |
|
694
|
|
|
if ($k % 2) { |
|
695
|
|
|
$params = $this->get_tag_attributes($v); |
|
696
|
|
|
// Detect tag-ending so that it is re-applied correctly. |
|
697
|
|
|
$tagEnd = substr($v, -2) === '/>' ? ' />' : '>'; |
|
698
|
|
|
// The 'name' of the first tag |
|
699
|
|
|
$firstTagName = $this->getFirstTagName($v); |
|
700
|
|
|
$somethingDone = 0; |
|
701
|
|
|
$prefix = $alternatives[strtoupper($firstTagName)] ?? $main_prefix; |
|
702
|
|
|
switch (strtolower($firstTagName)) { |
|
703
|
|
|
case 'td': |
|
704
|
|
|
|
|
705
|
|
|
case 'body': |
|
706
|
|
|
|
|
707
|
|
|
case 'table': |
|
708
|
|
|
$src = $params[0]['background']; |
|
709
|
|
|
if ($src) { |
|
710
|
|
|
$params[0]['background'] = $this->prefixRelPath($prefix, $params[0]['background'], $suffix); |
|
711
|
|
|
$somethingDone = 1; |
|
712
|
|
|
} |
|
713
|
|
|
break; |
|
714
|
|
|
case 'img': |
|
715
|
|
|
|
|
716
|
|
|
case 'input': |
|
717
|
|
|
|
|
718
|
|
|
case 'script': |
|
719
|
|
|
|
|
720
|
|
|
case 'embed': |
|
721
|
|
|
$src = $params[0]['src']; |
|
722
|
|
|
if ($src) { |
|
723
|
|
|
$params[0]['src'] = $this->prefixRelPath($prefix, $params[0]['src'], $suffix); |
|
724
|
|
|
$somethingDone = 1; |
|
725
|
|
|
} |
|
726
|
|
|
break; |
|
727
|
|
|
case 'link': |
|
728
|
|
|
|
|
729
|
|
|
case 'a': |
|
730
|
|
|
$src = $params[0]['href']; |
|
731
|
|
|
if ($src) { |
|
732
|
|
|
$params[0]['href'] = $this->prefixRelPath($prefix, $params[0]['href'], $suffix); |
|
733
|
|
|
$somethingDone = 1; |
|
734
|
|
|
} |
|
735
|
|
|
break; |
|
736
|
|
|
case 'form': |
|
737
|
|
|
$src = $params[0]['action']; |
|
738
|
|
|
if ($src) { |
|
739
|
|
|
$params[0]['action'] = $this->prefixRelPath($prefix, $params[0]['action'], $suffix); |
|
740
|
|
|
$somethingDone = 1; |
|
741
|
|
|
} |
|
742
|
|
|
break; |
|
743
|
|
|
case 'param': |
|
744
|
|
|
$test = $params[0]['name']; |
|
745
|
|
|
if ($test && $test === 'movie') { |
|
746
|
|
|
if ($params[0]['value']) { |
|
747
|
|
|
$params[0]['value'] = $this->prefixRelPath($prefix, $params[0]['value'], $suffix); |
|
748
|
|
|
$somethingDone = 1; |
|
749
|
|
|
} |
|
750
|
|
|
} |
|
751
|
|
|
break; |
|
752
|
|
|
} |
|
753
|
|
|
if ($somethingDone) { |
|
754
|
|
|
$tagParts = preg_split('/\\s+/s', $v, 2); |
|
755
|
|
|
$tagParts[1] = $this->compileTagAttribs($params[0], $params[1]); |
|
756
|
|
|
$parts[$k] = '<' . trim(strtolower($firstTagName) . ' ' . $tagParts[1]) . $tagEnd; |
|
757
|
|
|
} |
|
758
|
|
|
} |
|
759
|
|
|
} |
|
760
|
|
|
$content = implode('', $parts); |
|
761
|
|
|
// Fix <style> section: |
|
762
|
|
|
$prefix = $alternatives['style'] ?? $main_prefix; |
|
763
|
|
|
if ((string)$prefix !== '') { |
|
764
|
|
|
$parts = $this->splitIntoBlock('style', $content); |
|
765
|
|
|
foreach ($parts as $k => &$part) { |
|
766
|
|
|
if ($k % 2) { |
|
767
|
|
|
$part = preg_replace('/(url[[:space:]]*\\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\\))/i', '\\1' . $prefix . '\\2' . $suffix . '\\3', $part); |
|
768
|
|
|
} |
|
769
|
|
|
} |
|
770
|
|
|
unset($part); |
|
771
|
|
|
$content = implode('', $parts); |
|
772
|
|
|
} |
|
773
|
|
|
return $content; |
|
774
|
|
|
} |
|
775
|
|
|
|
|
776
|
|
|
/** |
|
777
|
|
|
* Internal sub-function for ->prefixResourcePath() |
|
778
|
|
|
* |
|
779
|
|
|
* @param string $prefix Prefix string |
|
780
|
|
|
* @param string $srcVal Relative path/URL |
|
781
|
|
|
* @param string $suffix Suffix string |
|
782
|
|
|
* @return string Output path, prefixed if no scheme in input string |
|
783
|
|
|
* @access private |
|
784
|
|
|
*/ |
|
785
|
|
|
public function prefixRelPath($prefix, $srcVal, $suffix = '') |
|
786
|
|
|
{ |
|
787
|
|
|
// Only prefix if it's not an absolute URL or |
|
788
|
|
|
// only a link to a section within the page. |
|
789
|
|
|
if ($srcVal[0] !== '/' && $srcVal[0] !== '#') { |
|
790
|
|
|
$urlParts = parse_url($srcVal); |
|
791
|
|
|
// Only prefix URLs without a scheme |
|
792
|
|
|
if (!$urlParts['scheme']) { |
|
793
|
|
|
$srcVal = $prefix . $srcVal . $suffix; |
|
794
|
|
|
} |
|
795
|
|
|
} |
|
796
|
|
|
return $srcVal; |
|
797
|
|
|
} |
|
798
|
|
|
|
|
799
|
|
|
/** |
|
800
|
|
|
* Internal function for case shifting of a string or whole array |
|
801
|
|
|
* |
|
802
|
|
|
* @param mixed $str Input string/array |
|
803
|
|
|
* @param bool $caseSensitiveComparison If this value is FALSE, the string is returned in uppercase |
|
804
|
|
|
* @param string $cacheKey Key string used for internal caching of the results. Could be an MD5 hash of the serialized version of the input $str if that is an array. |
|
805
|
|
|
* @return string Output string, processed |
|
806
|
|
|
* @access private |
|
807
|
|
|
*/ |
|
808
|
|
|
public function caseShift($str, $caseSensitiveComparison, $cacheKey = '') |
|
809
|
|
|
{ |
|
810
|
|
|
if ($caseSensitiveComparison) { |
|
811
|
|
|
return $str; |
|
812
|
|
|
} |
|
813
|
|
|
if (is_array($str)) { |
|
814
|
|
|
// Fetch from runlevel cache |
|
815
|
|
|
if ($cacheKey && isset($this->caseShift_cache[$cacheKey])) { |
|
816
|
|
|
$str = $this->caseShift_cache[$cacheKey]; |
|
817
|
|
|
} else { |
|
818
|
|
|
array_walk($str, function (&$value) { |
|
819
|
|
|
$value = strtoupper($value); |
|
820
|
|
|
}); |
|
821
|
|
|
if ($cacheKey) { |
|
822
|
|
|
$this->caseShift_cache[$cacheKey] = $str; |
|
823
|
|
|
} |
|
824
|
|
|
} |
|
825
|
|
|
} else { |
|
826
|
|
|
$str = strtoupper($str); |
|
827
|
|
|
} |
|
828
|
|
|
return $str; |
|
829
|
|
|
} |
|
830
|
|
|
|
|
831
|
|
|
/** |
|
832
|
|
|
* Compiling an array with tag attributes into a string |
|
833
|
|
|
* |
|
834
|
|
|
* @param array $tagAttrib Tag attributes |
|
835
|
|
|
* @param array $meta Meta information about these attributes (like if they were quoted) |
|
836
|
|
|
* @return string Imploded attributes, eg: 'attribute="value" attrib2="value2"' |
|
837
|
|
|
* @access private |
|
838
|
|
|
*/ |
|
839
|
|
|
public function compileTagAttribs($tagAttrib, $meta = []) |
|
840
|
|
|
{ |
|
841
|
|
|
$accu = []; |
|
842
|
|
|
foreach ($tagAttrib as $k => $v) { |
|
843
|
|
|
$attr = $meta[$k]['origTag'] ?: $k; |
|
844
|
|
|
if (strcmp($v, '') || isset($meta[$k]['dashType'])) { |
|
845
|
|
|
$dash = $meta[$k]['dashType'] ?: (MathUtility::canBeInterpretedAsInteger($v) ? '' : '"'); |
|
846
|
|
|
$attr .= '=' . $dash . $v . $dash; |
|
847
|
|
|
} |
|
848
|
|
|
$accu[] = $attr; |
|
849
|
|
|
} |
|
850
|
|
|
return implode(' ', $accu); |
|
851
|
|
|
} |
|
852
|
|
|
|
|
853
|
|
|
/** |
|
854
|
|
|
* Converts TSconfig into an array for the HTMLcleaner function. |
|
855
|
|
|
* |
|
856
|
|
|
* @param array $TSconfig TSconfig for HTMLcleaner |
|
857
|
|
|
* @param array $keepTags Array of tags to keep (?) |
|
858
|
|
|
* @return array |
|
859
|
|
|
* @access private |
|
860
|
|
|
*/ |
|
861
|
|
|
public function HTMLparserConfig($TSconfig, $keepTags = []) |
|
862
|
|
|
{ |
|
863
|
|
|
// Allow tags (base list, merged with incoming array) |
|
864
|
|
|
$alTags = array_flip(GeneralUtility::trimExplode(',', strtolower($TSconfig['allowTags']), true)); |
|
865
|
|
|
$keepTags = array_merge($alTags, $keepTags); |
|
866
|
|
|
// Set config properties. |
|
867
|
|
|
if (is_array($TSconfig['tags.'])) { |
|
868
|
|
|
foreach ($TSconfig['tags.'] as $key => $tagC) { |
|
869
|
|
|
if (!is_array($tagC) && $key == strtolower($key)) { |
|
870
|
|
|
if ((string)$tagC === '0') { |
|
871
|
|
|
unset($keepTags[$key]); |
|
872
|
|
|
} |
|
873
|
|
|
if ((string)$tagC === '1' && !isset($keepTags[$key])) { |
|
874
|
|
|
$keepTags[$key] = 1; |
|
875
|
|
|
} |
|
876
|
|
|
} |
|
877
|
|
|
} |
|
878
|
|
|
foreach ($TSconfig['tags.'] as $key => $tagC) { |
|
879
|
|
|
if (is_array($tagC) && $key == strtolower($key)) { |
|
880
|
|
|
$key = substr($key, 0, -1); |
|
881
|
|
|
if (!is_array($keepTags[$key])) { |
|
882
|
|
|
$keepTags[$key] = []; |
|
883
|
|
|
} |
|
884
|
|
|
if (is_array($tagC['fixAttrib.'])) { |
|
885
|
|
|
foreach ($tagC['fixAttrib.'] as $atName => $atConfig) { |
|
886
|
|
|
if (is_array($atConfig)) { |
|
887
|
|
|
$atName = substr($atName, 0, -1); |
|
888
|
|
|
if (!is_array($keepTags[$key]['fixAttrib'][$atName])) { |
|
889
|
|
|
$keepTags[$key]['fixAttrib'][$atName] = []; |
|
890
|
|
|
} |
|
891
|
|
|
$keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName], $atConfig); |
|
892
|
|
|
if ((string)$keepTags[$key]['fixAttrib'][$atName]['range'] !== '') { |
|
893
|
|
|
$keepTags[$key]['fixAttrib'][$atName]['range'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['range']); |
|
894
|
|
|
} |
|
895
|
|
|
if ((string)$keepTags[$key]['fixAttrib'][$atName]['list'] !== '') { |
|
896
|
|
|
$keepTags[$key]['fixAttrib'][$atName]['list'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['list']); |
|
897
|
|
|
} |
|
898
|
|
|
} |
|
899
|
|
|
} |
|
900
|
|
|
} |
|
901
|
|
|
unset($tagC['fixAttrib.']); |
|
902
|
|
|
unset($tagC['fixAttrib']); |
|
903
|
|
|
if (isset($tagC['rmTagIfNoAttrib']) && $tagC['rmTagIfNoAttrib'] && empty($tagC['nesting'])) { |
|
904
|
|
|
$tagC['nesting'] = 1; |
|
905
|
|
|
} |
|
906
|
|
|
$keepTags[$key] = array_merge($keepTags[$key], $tagC); |
|
907
|
|
|
} |
|
908
|
|
|
} |
|
909
|
|
|
} |
|
910
|
|
|
// LocalNesting |
|
911
|
|
|
if ($TSconfig['localNesting']) { |
|
912
|
|
|
$lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['localNesting']), true); |
|
913
|
|
|
foreach ($lN as $tn) { |
|
914
|
|
|
if (isset($keepTags[$tn])) { |
|
915
|
|
|
if (!is_array($keepTags[$tn])) { |
|
916
|
|
|
$keepTags[$tn] = []; |
|
917
|
|
|
} |
|
918
|
|
|
$keepTags[$tn]['nesting'] = 1; |
|
919
|
|
|
} |
|
920
|
|
|
} |
|
921
|
|
|
} |
|
922
|
|
|
if ($TSconfig['globalNesting']) { |
|
923
|
|
|
$lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['globalNesting']), true); |
|
924
|
|
|
foreach ($lN as $tn) { |
|
925
|
|
|
if (isset($keepTags[$tn])) { |
|
926
|
|
|
if (!is_array($keepTags[$tn])) { |
|
927
|
|
|
$keepTags[$tn] = []; |
|
928
|
|
|
} |
|
929
|
|
|
$keepTags[$tn]['nesting'] = 'global'; |
|
930
|
|
|
} |
|
931
|
|
|
} |
|
932
|
|
|
} |
|
933
|
|
|
if ($TSconfig['rmTagIfNoAttrib']) { |
|
934
|
|
|
$lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['rmTagIfNoAttrib']), true); |
|
935
|
|
|
foreach ($lN as $tn) { |
|
936
|
|
|
if (isset($keepTags[$tn])) { |
|
937
|
|
|
if (!is_array($keepTags[$tn])) { |
|
938
|
|
|
$keepTags[$tn] = []; |
|
939
|
|
|
} |
|
940
|
|
|
$keepTags[$tn]['rmTagIfNoAttrib'] = 1; |
|
941
|
|
|
if (empty($keepTags[$tn]['nesting'])) { |
|
942
|
|
|
$keepTags[$tn]['nesting'] = 1; |
|
943
|
|
|
} |
|
944
|
|
|
} |
|
945
|
|
|
} |
|
946
|
|
|
} |
|
947
|
|
|
if ($TSconfig['noAttrib']) { |
|
948
|
|
|
$lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['noAttrib']), true); |
|
949
|
|
|
foreach ($lN as $tn) { |
|
950
|
|
|
if (isset($keepTags[$tn])) { |
|
951
|
|
|
if (!is_array($keepTags[$tn])) { |
|
952
|
|
|
$keepTags[$tn] = []; |
|
953
|
|
|
} |
|
954
|
|
|
$keepTags[$tn]['allowedAttribs'] = 0; |
|
955
|
|
|
} |
|
956
|
|
|
} |
|
957
|
|
|
} |
|
958
|
|
|
if ($TSconfig['removeTags']) { |
|
959
|
|
|
$lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['removeTags']), true); |
|
960
|
|
|
foreach ($lN as $tn) { |
|
961
|
|
|
$keepTags[$tn] = []; |
|
962
|
|
|
$keepTags[$tn]['allowedAttribs'] = 0; |
|
963
|
|
|
$keepTags[$tn]['rmTagIfNoAttrib'] = 1; |
|
964
|
|
|
} |
|
965
|
|
|
} |
|
966
|
|
|
// Create additional configuration: |
|
967
|
|
|
$addConfig = []; |
|
968
|
|
|
if (isset($TSconfig['stripEmptyTags'])) { |
|
969
|
|
|
$addConfig['stripEmptyTags'] = $TSconfig['stripEmptyTags']; |
|
970
|
|
|
if (isset($TSconfig['stripEmptyTags.'])) { |
|
971
|
|
|
$addConfig['stripEmptyTags.'] = $TSconfig['stripEmptyTags.']; |
|
972
|
|
|
} |
|
973
|
|
|
} |
|
974
|
|
|
return [ |
|
975
|
|
|
$keepTags, |
|
976
|
|
|
'' . $TSconfig['keepNonMatchedTags'], |
|
977
|
|
|
(int)$TSconfig['htmlSpecialChars'], |
|
978
|
|
|
$addConfig |
|
979
|
|
|
]; |
|
980
|
|
|
} |
|
981
|
|
|
|
|
982
|
|
|
/** |
|
983
|
|
|
* Strips empty tags from HTML. |
|
984
|
|
|
* |
|
985
|
|
|
* @param string $content The content to be stripped of empty tags |
|
986
|
|
|
* @param string $tagList The comma separated list of tags to be stripped. |
|
987
|
|
|
* If empty, all empty tags will be stripped |
|
988
|
|
|
* @param bool $treatNonBreakingSpaceAsEmpty If TRUE tags containing only entities will be treated as empty. |
|
989
|
|
|
* @param bool $keepTags If true, the provided tags will be kept instead of stripped. |
|
990
|
|
|
* @return string the stripped content |
|
991
|
|
|
*/ |
|
992
|
|
|
public function stripEmptyTags($content, $tagList = '', $treatNonBreakingSpaceAsEmpty = false, $keepTags = false) |
|
993
|
|
|
{ |
|
994
|
|
|
if (!empty($tagList)) { |
|
995
|
|
|
$tagRegEx = implode('|', GeneralUtility::trimExplode(',', $tagList, true)); |
|
996
|
|
|
if ($keepTags) { |
|
997
|
|
|
$tagRegEx = '(?!' . $tagRegEx . ')[^ >]+'; |
|
998
|
|
|
} |
|
999
|
|
|
} else { |
|
1000
|
|
|
$tagRegEx = '[^ >]+'; // all characters until you reach a > or space; |
|
1001
|
|
|
} |
|
1002
|
|
|
$count = 1; |
|
1003
|
|
|
$nbspRegex = $treatNonBreakingSpaceAsEmpty ? '|( )' : ''; |
|
1004
|
|
|
$finalRegex = sprintf('/<(%s)[^>]*>( %s)*<\/\\1[^>]*>/i', $tagRegEx, $nbspRegex); |
|
1005
|
|
|
while ($count !== 0) { |
|
1006
|
|
|
$content = preg_replace($finalRegex, '', $content, -1, $count); |
|
1007
|
|
|
} |
|
1008
|
|
|
return $content; |
|
1009
|
|
|
} |
|
1010
|
|
|
|
|
1011
|
|
|
/** |
|
1012
|
|
|
* Strips the configured empty tags from the HMTL code. |
|
1013
|
|
|
* |
|
1014
|
|
|
* @param string $value |
|
1015
|
|
|
* @param array $configuration |
|
1016
|
|
|
* @return string |
|
1017
|
|
|
*/ |
|
1018
|
|
|
protected function stripEmptyTagsIfConfigured($value, $configuration) |
|
1019
|
|
|
{ |
|
1020
|
|
|
if (empty($configuration['stripEmptyTags'])) { |
|
1021
|
|
|
return $value; |
|
1022
|
|
|
} |
|
1023
|
|
|
|
|
1024
|
|
|
$tags = null; |
|
1025
|
|
|
$keepTags = false; |
|
1026
|
|
|
if (!empty($configuration['stripEmptyTags.']['keepTags'])) { |
|
1027
|
|
|
$tags = $configuration['stripEmptyTags.']['keepTags']; |
|
1028
|
|
|
$keepTags = true; |
|
1029
|
|
|
} elseif (!empty($configuration['stripEmptyTags.']['tags'])) { |
|
1030
|
|
|
$tags = $configuration['stripEmptyTags.']['tags']; |
|
1031
|
|
|
} |
|
1032
|
|
|
|
|
1033
|
|
|
$treatNonBreakingSpaceAsEmpty = !empty($configuration['stripEmptyTags.']['treatNonBreakingSpaceAsEmpty']); |
|
1034
|
|
|
|
|
1035
|
|
|
return $this->stripEmptyTags($value, $tags, $treatNonBreakingSpaceAsEmpty, $keepTags); |
|
1036
|
|
|
} |
|
1037
|
|
|
} |
|
1038
|
|
|
|