1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/* |
4
|
|
|
* This file is part of the TYPO3 CMS project. |
5
|
|
|
* |
6
|
|
|
* It is free software; you can redistribute it and/or modify it under |
7
|
|
|
* the terms of the GNU General Public License, either version 2 |
8
|
|
|
* of the License, or any later version. |
9
|
|
|
* |
10
|
|
|
* For the full copyright and license information, please read the |
11
|
|
|
* LICENSE.txt file that was distributed with this source code. |
12
|
|
|
* |
13
|
|
|
* The TYPO3 project - inspiring people to share! |
14
|
|
|
*/ |
15
|
|
|
|
16
|
|
|
namespace TYPO3\CMS\Core\Html; |
17
|
|
|
|
18
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility; |
19
|
|
|
use TYPO3\CMS\Core\Utility\MathUtility; |
20
|
|
|
use TYPO3\CMS\Frontend\ContentObject\ContentObjectRenderer; |
21
|
|
|
|
22
|
|
|
/** |
23
|
|
|
* Functions for parsing HTML. |
24
|
|
|
* You are encouraged to use this class in your own applications |
25
|
|
|
*/ |
26
|
|
|
class HtmlParser |
27
|
|
|
{ |
28
|
|
|
/** |
29
|
|
|
* @var array |
30
|
|
|
*/ |
31
|
|
|
protected $caseShift_cache = []; |
32
|
|
|
|
33
|
|
|
// Void elements that do not have closing tags, as defined by HTML5, except link element |
34
|
|
|
const VOID_ELEMENTS = 'area|base|br|col|command|embed|hr|img|input|keygen|meta|param|source|track|wbr'; |
35
|
|
|
|
36
|
|
|
/************************************ |
37
|
|
|
* |
38
|
|
|
* Parsing HTML code |
39
|
|
|
* |
40
|
|
|
************************************/ |
41
|
|
|
/** |
42
|
|
|
* Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag |
43
|
|
|
* Even numbers in the array are outside the blocks, Odd numbers are block-content. |
44
|
|
|
* Use ->removeFirstAndLastTag() to process the content if needed. |
45
|
|
|
* |
46
|
|
|
* @param string $tag List of tags, comma separated. |
47
|
|
|
* @param string $content HTML-content |
48
|
|
|
* @param bool $eliminateExtraEndTags If set, excessive end tags are ignored - you should probably set this in most cases. |
49
|
|
|
* @return array Even numbers in the array are outside the blocks, Odd numbers are block-content. |
50
|
|
|
* @see splitTags() |
51
|
|
|
* @see removeFirstAndLastTag() |
52
|
|
|
*/ |
53
|
|
|
public function splitIntoBlock($tag, $content, $eliminateExtraEndTags = false) |
54
|
|
|
{ |
55
|
|
|
$tags = array_unique(GeneralUtility::trimExplode(',', $tag, true)); |
56
|
|
|
array_walk($tags, function (&$tag) { |
57
|
|
|
$tag = preg_quote($tag, '/'); |
58
|
|
|
}); |
59
|
|
|
$regexStr = '/\\<\\/?(' . implode('|', $tags) . ')(\\s*\\>|\\s[^\\>]*\\>)/si'; |
60
|
|
|
$parts = preg_split($regexStr, $content); |
61
|
|
|
if (empty($parts)) { |
62
|
|
|
return []; |
63
|
|
|
} |
64
|
|
|
$newParts = []; |
65
|
|
|
$pointer = strlen($parts[0]); |
66
|
|
|
$buffer = $parts[0]; |
67
|
|
|
$nested = 0; |
68
|
|
|
reset($parts); |
69
|
|
|
// We skip the first element in foreach loop |
70
|
|
|
$partsSliced = array_slice($parts, 1, null, true); |
71
|
|
|
foreach ($partsSliced as $v) { |
72
|
|
|
$isEndTag = substr($content, $pointer, 2) === '</'; |
73
|
|
|
$tagLen = strcspn(substr($content, $pointer), '>') + 1; |
74
|
|
|
// We meet a start-tag: |
75
|
|
|
if (!$isEndTag) { |
76
|
|
|
// Ground level: |
77
|
|
|
if (!$nested) { |
78
|
|
|
// Previous buffer stored |
79
|
|
|
$newParts[] = $buffer; |
80
|
|
|
$buffer = ''; |
81
|
|
|
} |
82
|
|
|
// We are inside now! |
83
|
|
|
$nested++; |
84
|
|
|
// New buffer set and pointer increased |
85
|
|
|
$mbuffer = substr($content, $pointer, strlen($v) + $tagLen); |
86
|
|
|
$pointer += strlen($mbuffer); |
87
|
|
|
$buffer .= $mbuffer; |
88
|
|
|
} else { |
89
|
|
|
// If we meet an endtag: |
90
|
|
|
// Decrease nested-level |
91
|
|
|
$nested--; |
92
|
|
|
$eliminated = 0; |
93
|
|
|
if ($eliminateExtraEndTags && $nested < 0) { |
94
|
|
|
$nested = 0; |
95
|
|
|
$eliminated = 1; |
96
|
|
|
} else { |
97
|
|
|
// In any case, add the endtag to current buffer and increase pointer |
98
|
|
|
$buffer .= substr($content, $pointer, $tagLen); |
99
|
|
|
} |
100
|
|
|
$pointer += $tagLen; |
101
|
|
|
// if we're back on ground level, (and not by eliminating tags... |
102
|
|
|
if (!$nested && !$eliminated) { |
103
|
|
|
$newParts[] = $buffer; |
104
|
|
|
$buffer = ''; |
105
|
|
|
} |
106
|
|
|
// New buffer set and pointer increased |
107
|
|
|
$mbuffer = substr($content, $pointer, strlen($v)); |
108
|
|
|
$pointer += strlen($mbuffer); |
109
|
|
|
$buffer .= $mbuffer; |
110
|
|
|
} |
111
|
|
|
} |
112
|
|
|
$newParts[] = $buffer; |
113
|
|
|
return $newParts; |
114
|
|
|
} |
115
|
|
|
|
116
|
|
|
/** |
117
|
|
|
* Splitting content into blocks *recursively* and processing tags/content with call back functions. |
118
|
|
|
* |
119
|
|
|
* @param string $tag Tag list, see splitIntoBlock() |
120
|
|
|
* @param string $content Content, see splitIntoBlock() |
121
|
|
|
* @param object $procObj Object where call back methods are. |
122
|
|
|
* @param string $callBackContent Name of call back method for content; "function callBackContent($str,$level) |
123
|
|
|
* @param string $callBackTags Name of call back method for tags; "function callBackTags($tags,$level) |
124
|
|
|
* @param int $level Indent level |
125
|
|
|
* @return string Processed content |
126
|
|
|
* @see splitIntoBlock() |
127
|
|
|
*/ |
128
|
|
|
public function splitIntoBlockRecursiveProc($tag, $content, &$procObj, $callBackContent, $callBackTags, $level = 0) |
129
|
|
|
{ |
130
|
|
|
$parts = $this->splitIntoBlock($tag, $content, true); |
131
|
|
|
foreach ($parts as $k => $v) { |
132
|
|
|
if ($k % 2) { |
133
|
|
|
$firstTagName = $this->getFirstTagName($v, true); |
134
|
|
|
$tagsArray = []; |
135
|
|
|
$tagsArray['tag_start'] = $this->getFirstTag($v); |
136
|
|
|
$tagsArray['tag_end'] = '</' . $firstTagName . '>'; |
137
|
|
|
$tagsArray['tag_name'] = strtolower($firstTagName); |
138
|
|
|
$tagsArray['content'] = $this->splitIntoBlockRecursiveProc($tag, $this->removeFirstAndLastTag($v), $procObj, $callBackContent, $callBackTags, $level + 1); |
139
|
|
|
if ($callBackTags) { |
140
|
|
|
$tagsArray = $procObj->{$callBackTags}($tagsArray, $level); |
141
|
|
|
} |
142
|
|
|
$parts[$k] = $tagsArray['tag_start'] . $tagsArray['content'] . $tagsArray['tag_end']; |
143
|
|
|
} else { |
144
|
|
|
if ($callBackContent) { |
145
|
|
|
$parts[$k] = $procObj->{$callBackContent}($parts[$k], $level); |
146
|
|
|
} |
147
|
|
|
} |
148
|
|
|
} |
149
|
|
|
return implode('', $parts); |
150
|
|
|
} |
151
|
|
|
|
152
|
|
|
/** |
153
|
|
|
* Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag |
154
|
|
|
* Even numbers in the array are outside the blocks, Odd numbers are block-content. |
155
|
|
|
* Use ->removeFirstAndLastTag() to process the content if needed. |
156
|
|
|
* |
157
|
|
|
* @param string $tag List of tags |
158
|
|
|
* @param string $content HTML-content |
159
|
|
|
* @return array Even numbers in the array are outside the blocks, Odd numbers are block-content. |
160
|
|
|
* @see splitIntoBlock() |
161
|
|
|
* @see removeFirstAndLastTag() |
162
|
|
|
*/ |
163
|
|
|
public function splitTags($tag, $content) |
164
|
|
|
{ |
165
|
|
|
$tags = GeneralUtility::trimExplode(',', $tag, true); |
166
|
|
|
array_walk($tags, function (&$tag) { |
167
|
|
|
$tag = preg_quote($tag, '/'); |
168
|
|
|
}); |
169
|
|
|
$regexStr = '/\\<(' . implode('|', $tags) . ')(\\s[^>]*)?\\/?>/si'; |
170
|
|
|
$parts = preg_split($regexStr, $content); |
171
|
|
|
if (empty($parts)) { |
172
|
|
|
return []; |
173
|
|
|
} |
174
|
|
|
$pointer = strlen($parts[0]); |
175
|
|
|
$newParts = []; |
176
|
|
|
$newParts[] = $parts[0]; |
177
|
|
|
reset($parts); |
178
|
|
|
// We skip the first element in foreach loop |
179
|
|
|
$partsSliced = array_slice($parts, 1, null, true); |
180
|
|
|
foreach ($partsSliced as $v) { |
181
|
|
|
$tagLen = strcspn(substr($content, $pointer), '>') + 1; |
182
|
|
|
// Set tag: |
183
|
|
|
// New buffer set and pointer increased |
184
|
|
|
$tag = substr($content, $pointer, $tagLen); |
185
|
|
|
$newParts[] = $tag; |
186
|
|
|
$pointer += strlen($tag); |
187
|
|
|
// Set content: |
188
|
|
|
$newParts[] = $v; |
189
|
|
|
$pointer += strlen($v); |
190
|
|
|
} |
191
|
|
|
return $newParts; |
192
|
|
|
} |
193
|
|
|
|
194
|
|
|
/** |
195
|
|
|
* Removes the first and last tag in the string |
196
|
|
|
* Anything before the first and after the last tags respectively is also removed |
197
|
|
|
* |
198
|
|
|
* @param string $str String to process |
199
|
|
|
* @return string |
200
|
|
|
*/ |
201
|
|
|
public function removeFirstAndLastTag($str) |
202
|
|
|
{ |
203
|
|
|
// End of first tag: |
204
|
|
|
$start = strpos($str, '>'); |
205
|
|
|
// Begin of last tag: |
206
|
|
|
$end = strrpos($str, '<'); |
207
|
|
|
// Return |
208
|
|
|
return substr($str, $start + 1, $end - $start - 1); |
209
|
|
|
} |
210
|
|
|
|
211
|
|
|
/** |
212
|
|
|
* Returns the first tag in $str |
213
|
|
|
* Actually everything from the beginning of the $str is returned, so you better make sure the tag is the first thing... |
214
|
|
|
* |
215
|
|
|
* @param string $str HTML string with tags |
216
|
|
|
* @return string |
217
|
|
|
*/ |
218
|
|
|
public function getFirstTag($str) |
219
|
|
|
{ |
220
|
|
|
// First: |
221
|
|
|
$endLen = strpos($str, '>'); |
222
|
|
|
return $endLen !== false ? substr($str, 0, $endLen + 1) : ''; |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
/** |
226
|
|
|
* Returns the NAME of the first tag in $str |
227
|
|
|
* |
228
|
|
|
* @param string $str HTML tag (The element name MUST be separated from the attributes by a space character! Just *whitespace* will not do) |
229
|
|
|
* @param bool $preserveCase If set, then the tag is NOT converted to uppercase by case is preserved. |
230
|
|
|
* @return string Tag name in upper case |
231
|
|
|
* @see getFirstTag() |
232
|
|
|
*/ |
233
|
|
|
public function getFirstTagName($str, $preserveCase = false) |
234
|
|
|
{ |
235
|
|
|
$matches = []; |
236
|
|
|
if (preg_match('/^\\s*\\<([^\\s\\>]+)(\\s|\\>)/', $str, $matches) === 1) { |
237
|
|
|
if (!$preserveCase) { |
238
|
|
|
return strtoupper($matches[1]); |
239
|
|
|
} |
240
|
|
|
return $matches[1]; |
241
|
|
|
} |
242
|
|
|
return ''; |
243
|
|
|
} |
244
|
|
|
|
245
|
|
|
/** |
246
|
|
|
* Returns an array with all attributes as keys. Attributes are only lowercase a-z |
247
|
|
|
* If an attribute is empty (shorthand), then the value for the key is empty. You can check if it existed with isset() |
248
|
|
|
* |
249
|
|
|
* Compared to the method in GeneralUtility::get_tag_attributes this method also returns meta data about each |
250
|
|
|
* attribute, e.g. if it is a shorthand attribute, and what the quotation is. Also, since all attribute keys |
251
|
|
|
* are lower-cased, the meta information contains the original attribute name. |
252
|
|
|
* |
253
|
|
|
* @param string $tag Tag: $tag is either a whole tag (eg '<TAG OPTION ATTRIB=VALUE>') or the parameterlist (ex ' OPTION ATTRIB=VALUE>') |
254
|
|
|
* @param bool $deHSC If set, the attribute values are de-htmlspecialchar'ed. Should actually always be set! |
255
|
|
|
* @return array array(Tag attributes,Attribute meta-data) |
256
|
|
|
*/ |
257
|
|
|
public function get_tag_attributes($tag, $deHSC = false) |
258
|
|
|
{ |
259
|
|
|
[$components, $metaC] = $this->split_tag_attributes($tag); |
260
|
|
|
// Attribute name is stored here |
261
|
|
|
$name = ''; |
262
|
|
|
$valuemode = false; |
263
|
|
|
$attributes = []; |
264
|
|
|
$attributesMeta = []; |
265
|
|
|
if (is_array($components)) { |
266
|
|
|
foreach ($components as $key => $val) { |
267
|
|
|
// Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value |
268
|
|
|
if ($val !== '=') { |
269
|
|
|
if ($valuemode) { |
270
|
|
|
if ($name) { |
271
|
|
|
$attributes[$name] = $deHSC ? htmlspecialchars_decode($val) : $val; |
272
|
|
|
$attributesMeta[$name]['dashType'] = $metaC[$key]; |
273
|
|
|
$name = ''; |
274
|
|
|
} |
275
|
|
|
} else { |
276
|
|
|
if ($namekey = preg_replace('/[^[:alnum:]_\\:\\-]/', '', $val) ?? '') { |
277
|
|
|
$name = strtolower((string)$namekey); |
278
|
|
|
$attributesMeta[$name] = []; |
279
|
|
|
$attributesMeta[$name]['origTag'] = $namekey; |
280
|
|
|
$attributes[$name] = ''; |
281
|
|
|
} |
282
|
|
|
} |
283
|
|
|
$valuemode = false; |
284
|
|
|
} else { |
285
|
|
|
$valuemode = true; |
286
|
|
|
} |
287
|
|
|
} |
288
|
|
|
return [$attributes, $attributesMeta]; |
289
|
|
|
} |
290
|
|
|
return [null, null]; |
291
|
|
|
} |
292
|
|
|
|
293
|
|
|
/** |
294
|
|
|
* Returns an array with the 'components' from an attribute list. |
295
|
|
|
* The result is normally analyzed by get_tag_attributes |
296
|
|
|
* Removes tag-name if found. |
297
|
|
|
* |
298
|
|
|
* The difference between this method and the one in GeneralUtility is that this method actually determines |
299
|
|
|
* more information on the attribute, e.g. if the value is enclosed by a " or ' character. |
300
|
|
|
* That's why this method returns two arrays, the "components" and the "meta-information" of the "components". |
301
|
|
|
* |
302
|
|
|
* @param string $tag The tag or attributes |
303
|
|
|
* @return array |
304
|
|
|
* @internal |
305
|
|
|
* @see \TYPO3\CMS\Core\Utility\GeneralUtility::split_tag_attributes() |
306
|
|
|
*/ |
307
|
|
|
public function split_tag_attributes($tag) |
308
|
|
|
{ |
309
|
|
|
$matches = []; |
310
|
|
|
if (preg_match('/(\\<[^\\s]+\\s+)?(.*?)\\s*(\\>)?$/s', $tag, $matches) !== 1) { |
311
|
|
|
return [[], []]; |
312
|
|
|
} |
313
|
|
|
$tag_tmp = $matches[2]; |
314
|
|
|
$metaValue = []; |
315
|
|
|
$value = []; |
316
|
|
|
$matches = []; |
317
|
|
|
if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\\s"\'\\=]+|\\=)/s', $tag_tmp, $matches) > 0) { |
318
|
|
|
foreach ($matches[1] as $part) { |
319
|
|
|
$firstChar = $part[0]; |
320
|
|
|
if ($firstChar === '"' || $firstChar === '\'') { |
321
|
|
|
$metaValue[] = $firstChar; |
322
|
|
|
$value[] = substr($part, 1, -1); |
323
|
|
|
} else { |
324
|
|
|
$metaValue[] = ''; |
325
|
|
|
$value[] = $part; |
326
|
|
|
} |
327
|
|
|
} |
328
|
|
|
} |
329
|
|
|
return [$value, $metaValue]; |
330
|
|
|
} |
331
|
|
|
|
332
|
|
|
/********************************* |
333
|
|
|
* |
334
|
|
|
* Clean HTML code |
335
|
|
|
* |
336
|
|
|
*********************************/ |
337
|
|
|
/** |
338
|
|
|
* Function that can clean up HTML content according to configuration given in the $tags array. |
339
|
|
|
* |
340
|
|
|
* Initializing the $tags array to allow a list of tags (in this case <B>,<I>,<U> and <A>), set it like this: $tags = array_flip(explode(',','b,a,i,u')) |
341
|
|
|
* If the value of the $tags[$tagname] entry is an array, advanced processing of the tags is initialized. These are the options: |
342
|
|
|
* |
343
|
|
|
* $tags[$tagname] = Array( |
344
|
|
|
* 'overrideAttribs' => '' If set, this string is preset as the attributes of the tag |
345
|
|
|
* 'allowedAttribs' => '0' (zero) = no attributes allowed, '[commalist of attributes]' = only allowed attributes. If blank, all attributes are allowed. |
346
|
|
|
* 'fixAttrib' => Array( |
347
|
|
|
* '[attribute name]' => Array ( |
348
|
|
|
* 'set' => Force the attribute value to this value. |
349
|
|
|
* 'unset' => Boolean: If set, the attribute is unset. |
350
|
|
|
* 'default' => If no attribute exists by this name, this value is set as default value (if this value is not blank) |
351
|
|
|
* 'always' => Boolean. If set, the attribute is always processed. Normally an attribute is processed only if it exists |
352
|
|
|
* 'trim,intval,lower,upper' => All booleans. If any of these keys are set, the value is passed through the respective PHP-functions. |
353
|
|
|
* 'range' => Array ('[low limit]','[high limit, optional]') Setting integer range. |
354
|
|
|
* 'list' => Array ('[value1/default]','[value2]','[value3]') Attribute must be in this list. If not, the value is set to the first element. |
355
|
|
|
* 'removeIfFalse' => Boolean/'blank'. If set, then the attribute is removed if it is 'FALSE'. If this value is set to 'blank' then the value must be a blank string (that means a 'zero' value will not be removed) |
356
|
|
|
* 'removeIfEquals' => [value] If the attribute value matches the value set here, then it is removed. |
357
|
|
|
* 'casesensitiveComp' => 1 If set, then the removeIfEquals and list comparisons will be case sensitive. Otherwise not. |
358
|
|
|
* ) |
359
|
|
|
* ), |
360
|
|
|
* 'protect' => '', Boolean. If set, the tag <> is converted to < and > |
361
|
|
|
* 'remap' => '', String. If set, the tagname is remapped to this tagname |
362
|
|
|
* 'rmTagIfNoAttrib' => '', Boolean. If set, then the tag is removed if no attributes happened to be there. |
363
|
|
|
* 'nesting' => '', Boolean/'global'. If set TRUE, then this tag must have starting and ending tags in the correct order. Any tags not in this order will be discarded. Thus '</B><B><I></B></I></B>' will be converted to '<B><I></B></I>'. Is the value 'global' then true nesting in relation to other tags marked for 'global' nesting control is preserved. This means that if <B> and <I> are set for global nesting then this string '</B><B><I></B></I></B>' is converted to '<B></B>' |
364
|
|
|
* ) |
365
|
|
|
* |
366
|
|
|
* @param string $content Is the HTML-content being processed. This is also the result being returned. |
367
|
|
|
* @param array $tags Is an array where each key is a tagname in lowercase. Only tags present as keys in this array are preserved. The value of the key can be an array with a vast number of options to configure. |
368
|
|
|
* @param mixed $keepAll Boolean/'protect', if set, then all tags are kept regardless of tags present as keys in $tags-array. If 'protect' then the preserved tags have their <> converted to < and > |
369
|
|
|
* @param int $hSC Values -1,0,1,2: Set to zero= disabled, set to 1 then the content BETWEEN tags is htmlspecialchar()'ed, set to -1 its the opposite and set to 2 the content will be HSC'ed BUT with preservation for real entities (eg. "&" or "ê") |
370
|
|
|
* @param array $addConfig Configuration array send along as $conf to the internal functions |
371
|
|
|
* @return string Processed HTML content |
372
|
|
|
*/ |
373
|
|
|
public function HTMLcleaner($content, $tags = [], $keepAll = 0, $hSC = 0, $addConfig = []) |
374
|
|
|
{ |
375
|
|
|
$newContent = []; |
376
|
|
|
$tokArr = explode('<', $content); |
377
|
|
|
$newContent[] = $this->bidir_htmlspecialchars(current($tokArr), $hSC); |
378
|
|
|
// We skip the first element in foreach loop |
379
|
|
|
$tokArrSliced = array_slice($tokArr, 1, null, true); |
380
|
|
|
$c = 1; |
381
|
|
|
$tagRegister = []; |
382
|
|
|
$tagStack = []; |
383
|
|
|
$inComment = false; |
384
|
|
|
$inCdata = false; |
385
|
|
|
$skipTag = false; |
386
|
|
|
foreach ($tokArrSliced as $tok) { |
387
|
|
|
if ($inComment) { |
388
|
|
|
if (($eocPos = strpos($tok, '-->')) === false) { |
389
|
|
|
// End of comment is not found in the token. Go further until end of comment is found in other tokens. |
390
|
|
|
$newContent[$c++] = '<' . $tok; |
391
|
|
|
continue; |
392
|
|
|
} |
393
|
|
|
// Comment ends in the middle of the token: add comment and proceed with rest of the token |
394
|
|
|
$newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3); |
395
|
|
|
$tok = substr($tok, $eocPos + 3); |
396
|
|
|
$inComment = false; |
397
|
|
|
$skipTag = true; |
398
|
|
|
} elseif ($inCdata) { |
399
|
|
|
if (($eocPos = strpos($tok, '/*]]>*/')) === false) { |
400
|
|
|
// End of comment is not found in the token. Go further until end of comment is found in other tokens. |
401
|
|
|
$newContent[$c++] = '<' . $tok; |
402
|
|
|
continue; |
403
|
|
|
} |
404
|
|
|
// Comment ends in the middle of the token: add comment and proceed with rest of the token |
405
|
|
|
$newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10); |
406
|
|
|
$tok = substr($tok, $eocPos + 10); |
407
|
|
|
$inCdata = false; |
408
|
|
|
$skipTag = true; |
409
|
|
|
} elseif (strpos($tok, '!--') === 0) { |
410
|
|
|
if (($eocPos = strpos($tok, '-->')) === false) { |
411
|
|
|
// Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment |
412
|
|
|
$newContent[$c++] = '<' . $tok; |
413
|
|
|
$inComment = true; |
414
|
|
|
continue; |
415
|
|
|
} |
416
|
|
|
// Start and end of comment are both in the current token. Add comment and proceed with rest of the token |
417
|
|
|
$newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3); |
418
|
|
|
$tok = substr($tok, $eocPos + 3); |
419
|
|
|
$skipTag = true; |
420
|
|
|
} elseif (strpos($tok, '![CDATA[*/') === 0) { |
421
|
|
|
if (($eocPos = strpos($tok, '/*]]>*/')) === false) { |
422
|
|
|
// Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment |
423
|
|
|
$newContent[$c++] = '<' . $tok; |
424
|
|
|
$inCdata = true; |
425
|
|
|
continue; |
426
|
|
|
} |
427
|
|
|
// Start and end of comment are both in the current token. Add comment and proceed with rest of the token |
428
|
|
|
$newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10); |
429
|
|
|
$tok = substr($tok, $eocPos + 10); |
430
|
|
|
$skipTag = true; |
431
|
|
|
} |
432
|
|
|
$firstChar = $tok[0] ?? null; |
433
|
|
|
// It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..> |
434
|
|
|
if (!$skipTag && preg_match('/[[:alnum:]\\/]/', (string)$firstChar) === 1) { |
435
|
|
|
$tagEnd = strpos($tok, '>'); |
436
|
|
|
// If there is and end-bracket... tagEnd can't be 0 as the first character can't be a > |
437
|
|
|
if ($tagEnd) { |
438
|
|
|
$endTag = $firstChar === '/' ? 1 : 0; |
439
|
|
|
$tagContent = substr($tok, $endTag, $tagEnd - $endTag); |
440
|
|
|
$tagParts = preg_split('/\\s+/s', $tagContent, 2); |
441
|
|
|
$tagName = strtolower($tagParts[0]); |
442
|
|
|
$emptyTag = 0; |
443
|
|
|
if (isset($tags[$tagName])) { |
444
|
|
|
// If there is processing to do for the tag: |
445
|
|
|
if (is_array($tags[$tagName])) { |
446
|
|
|
if (preg_match('/^(' . self::VOID_ELEMENTS . ' )$/i', $tagName)) { |
447
|
|
|
$emptyTag = 1; |
448
|
|
|
} |
449
|
|
|
// If NOT an endtag, do attribute processing (added dec. 2003) |
450
|
|
|
if (!$endTag) { |
451
|
|
|
// Override attributes |
452
|
|
|
if (isset($tags[$tagName]['overrideAttribs']) && (string)$tags[$tagName]['overrideAttribs'] !== '') { |
453
|
|
|
$tagParts[1] = $tags[$tagName]['overrideAttribs']; |
454
|
|
|
} |
455
|
|
|
// Allowed tags |
456
|
|
|
if (isset($tags[$tagName]['allowedAttribs']) && (string)$tags[$tagName]['allowedAttribs'] !== '') { |
457
|
|
|
// No attribs allowed |
458
|
|
|
if ((string)$tags[$tagName]['allowedAttribs'] === '0') { |
459
|
|
|
$tagParts[1] = ''; |
460
|
|
|
} elseif (isset($tagParts[1]) && trim($tagParts[1])) { |
461
|
|
|
$tagAttrib = $this->get_tag_attributes($tagParts[1]); |
462
|
|
|
$tagParts[1] = ''; |
463
|
|
|
$newTagAttrib = []; |
464
|
|
|
$tList = (array)( |
465
|
|
|
$tags[$tagName]['_allowedAttribs'] |
466
|
|
|
?? GeneralUtility::trimExplode(',', strtolower($tags[$tagName]['allowedAttribs']), true) |
467
|
|
|
); |
468
|
|
|
foreach ($tList as $allowTag) { |
469
|
|
|
if (isset($tagAttrib[0][$allowTag])) { |
470
|
|
|
$newTagAttrib[$allowTag] = $tagAttrib[0][$allowTag]; |
471
|
|
|
} |
472
|
|
|
} |
473
|
|
|
|
474
|
|
|
$tagParts[1] = $this->compileTagAttribs($newTagAttrib, $tagAttrib[1]); |
475
|
|
|
} |
476
|
|
|
} |
477
|
|
|
// Fixed attrib values |
478
|
|
|
if (isset($tags[$tagName]['fixAttrib']) && is_array($tags[$tagName]['fixAttrib'])) { |
479
|
|
|
$tagAttrib = $this->get_tag_attributes($tagParts[1]); |
480
|
|
|
$tagParts[1] = ''; |
481
|
|
|
foreach ($tags[$tagName]['fixAttrib'] as $attr => $params) { |
482
|
|
|
if (isset($params['set']) && $params['set'] !== '') { |
483
|
|
|
$tagAttrib[0][$attr] = $params['set']; |
484
|
|
|
} |
485
|
|
|
if (!empty($params['unset'])) { |
486
|
|
|
unset($tagAttrib[0][$attr]); |
487
|
|
|
} |
488
|
|
|
if (!empty($params['default']) && !isset($tagAttrib[0][$attr])) { |
489
|
|
|
$tagAttrib[0][$attr] = $params['default']; |
490
|
|
|
} |
491
|
|
|
if ($params['always'] || isset($tagAttrib[0][$attr])) { |
492
|
|
|
if ($params['trim']) { |
493
|
|
|
$tagAttrib[0][$attr] = trim($tagAttrib[0][$attr]); |
494
|
|
|
} |
495
|
|
|
if ($params['intval']) { |
496
|
|
|
$tagAttrib[0][$attr] = (int)$tagAttrib[0][$attr]; |
497
|
|
|
} |
498
|
|
|
if ($params['lower']) { |
499
|
|
|
$tagAttrib[0][$attr] = strtolower($tagAttrib[0][$attr]); |
500
|
|
|
} |
501
|
|
|
if ($params['upper']) { |
502
|
|
|
$tagAttrib[0][$attr] = strtoupper($tagAttrib[0][$attr]); |
503
|
|
|
} |
504
|
|
|
if ($params['range']) { |
505
|
|
|
if (isset($params['range'][1])) { |
506
|
|
|
$tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0], (int)$params['range'][1]); |
507
|
|
|
} else { |
508
|
|
|
$tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0]); |
509
|
|
|
} |
510
|
|
|
} |
511
|
|
|
if (isset($params['list']) && is_array($params['list'])) { |
512
|
|
|
// For the class attribute, remove from the attribute value any class not in the list |
513
|
|
|
// Classes are case sensitive |
514
|
|
|
if ($attr === 'class') { |
515
|
|
|
$newClasses = []; |
516
|
|
|
$classes = GeneralUtility::trimExplode(' ', $tagAttrib[0][$attr], true); |
517
|
|
|
foreach ($classes as $class) { |
518
|
|
|
if (in_array($class, $params['list'])) { |
519
|
|
|
$newClasses[] = $class; |
520
|
|
|
} |
521
|
|
|
} |
522
|
|
|
if (!empty($newClasses)) { |
523
|
|
|
$tagAttrib[0][$attr] = implode(' ', $newClasses); |
524
|
|
|
} else { |
525
|
|
|
$tagAttrib[0][$attr] = $params['list'][0]; |
526
|
|
|
} |
527
|
|
|
} else { |
528
|
|
|
if (!in_array($this->caseShift($tagAttrib[0][$attr], $params['casesensitiveComp']), (array)$this->caseShift($params['list'], $params['casesensitiveComp'], $tagName))) { |
529
|
|
|
$tagAttrib[0][$attr] = $params['list'][0]; |
530
|
|
|
} |
531
|
|
|
} |
532
|
|
|
} |
533
|
|
|
if ($params['removeIfFalse'] && $params['removeIfFalse'] !== 'blank' && !$tagAttrib[0][$attr] || $params['removeIfFalse'] === 'blank' && (string)$tagAttrib[0][$attr] === '') { |
|
|
|
|
534
|
|
|
unset($tagAttrib[0][$attr]); |
535
|
|
|
} |
536
|
|
|
if ((string)$params['removeIfEquals'] !== '' && $this->caseShift($tagAttrib[0][$attr], $params['casesensitiveComp']) === $this->caseShift($params['removeIfEquals'], $params['casesensitiveComp'])) { |
537
|
|
|
unset($tagAttrib[0][$attr]); |
538
|
|
|
} |
539
|
|
|
if ($params['prefixLocalAnchors']) { |
540
|
|
|
if ($tagAttrib[0][$attr][0] === '#') { |
541
|
|
|
if ($params['prefixLocalAnchors'] == 2) { |
542
|
|
|
/** @var ContentObjectRenderer $contentObjectRenderer */ |
543
|
|
|
$contentObjectRenderer = GeneralUtility::makeInstance(ContentObjectRenderer::class); |
544
|
|
|
$prefix = $contentObjectRenderer->getUrlToCurrentLocation(); |
545
|
|
|
} else { |
546
|
|
|
$prefix = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL'); |
547
|
|
|
} |
548
|
|
|
$tagAttrib[0][$attr] = $prefix . $tagAttrib[0][$attr]; |
549
|
|
|
} |
550
|
|
|
} |
551
|
|
|
if ($params['prefixRelPathWith']) { |
552
|
|
|
$urlParts = parse_url($tagAttrib[0][$attr]); |
553
|
|
|
if (!$urlParts['scheme'] && $urlParts['path'][0] !== '/') { |
554
|
|
|
// If it is NOT an absolute URL (by http: or starting "/") |
555
|
|
|
$tagAttrib[0][$attr] = $params['prefixRelPathWith'] . $tagAttrib[0][$attr]; |
556
|
|
|
} |
557
|
|
|
} |
558
|
|
|
if ($params['userFunc']) { |
559
|
|
|
if (is_array($params['userFunc.'])) { |
560
|
|
|
$params['userFunc.']['attributeValue'] = $tagAttrib[0][$attr]; |
561
|
|
|
} else { |
562
|
|
|
$params['userFunc.'] = $tagAttrib[0][$attr]; |
563
|
|
|
} |
564
|
|
|
$tagAttrib[0][$attr] = GeneralUtility::callUserFunction($params['userFunc'], $params['userFunc.'], $this); |
565
|
|
|
} |
566
|
|
|
} |
567
|
|
|
} |
568
|
|
|
$tagParts[1] = $this->compileTagAttribs($tagAttrib[0], $tagAttrib[1]); |
|
|
|
|
569
|
|
|
} |
570
|
|
|
} else { |
571
|
|
|
// If endTag, remove any possible attributes: |
572
|
|
|
$tagParts[1] = ''; |
573
|
|
|
} |
574
|
|
|
// Protecting the tag by converting < and > to < and > ?? |
575
|
|
|
if (!empty($tags[$tagName]['protect'])) { |
576
|
|
|
$lt = '<'; |
577
|
|
|
$gt = '>'; |
578
|
|
|
} else { |
579
|
|
|
$lt = '<'; |
580
|
|
|
$gt = '>'; |
581
|
|
|
} |
582
|
|
|
// Remapping tag name? |
583
|
|
|
if (!empty($tags[$tagName]['remap'])) { |
584
|
|
|
$tagParts[0] = $tags[$tagName]['remap']; |
585
|
|
|
} |
586
|
|
|
// rmTagIfNoAttrib |
587
|
|
|
if ($endTag || empty($tags[$tagName]['rmTagIfNoAttrib']) || trim($tagParts[1] ?? '')) { |
588
|
|
|
$setTag = true; |
589
|
|
|
// Remove this closing tag if $tagName was among $TSconfig['removeTags'] |
590
|
|
|
if ($endTag && isset($tags[$tagName]['allowedAttribs']) && $tags[$tagName]['allowedAttribs'] === 0 && $tags[$tagName]['rmTagIfNoAttrib'] === 1) { |
591
|
|
|
$setTag = false; |
592
|
|
|
} |
593
|
|
|
if (isset($tags[$tagName]['nesting'])) { |
594
|
|
|
if (!isset($tagRegister[$tagName])) { |
595
|
|
|
$tagRegister[$tagName] = []; |
596
|
|
|
} |
597
|
|
|
if ($endTag) { |
598
|
|
|
$correctTag = true; |
599
|
|
|
if ($tags[$tagName]['nesting'] === 'global') { |
600
|
|
|
$lastEl = end($tagStack); |
601
|
|
|
if ($tagName !== $lastEl) { |
602
|
|
|
if (in_array($tagName, $tagStack, true)) { |
603
|
|
|
while (!empty($tagStack) && $tagName !== $lastEl) { |
604
|
|
|
$elPos = end($tagRegister[$lastEl]); |
605
|
|
|
unset($newContent[$elPos]); |
606
|
|
|
array_pop($tagRegister[$lastEl]); |
607
|
|
|
array_pop($tagStack); |
608
|
|
|
$lastEl = end($tagStack); |
609
|
|
|
} |
610
|
|
|
} else { |
611
|
|
|
// In this case the |
612
|
|
|
$correctTag = false; |
613
|
|
|
} |
614
|
|
|
} |
615
|
|
|
} |
616
|
|
|
if (empty($tagRegister[$tagName]) || !$correctTag) { |
617
|
|
|
$setTag = false; |
618
|
|
|
} else { |
619
|
|
|
array_pop($tagRegister[$tagName]); |
620
|
|
|
if ($tags[$tagName]['nesting'] === 'global') { |
621
|
|
|
array_pop($tagStack); |
622
|
|
|
} |
623
|
|
|
} |
624
|
|
|
} else { |
625
|
|
|
$tagRegister[$tagName][] = $c; |
626
|
|
|
if ($tags[$tagName]['nesting'] === 'global') { |
627
|
|
|
$tagStack[] = $tagName; |
628
|
|
|
} |
629
|
|
|
} |
630
|
|
|
} |
631
|
|
|
if ($setTag) { |
632
|
|
|
// Setting the tag |
633
|
|
|
$newContent[$c++] = $lt . ($endTag ? '/' : '') . trim($tagParts[0] . ' ' . ($tagParts[1] ?? '')) . ($emptyTag ? ' /' : '') . $gt; |
634
|
|
|
} |
635
|
|
|
} |
636
|
|
|
} else { |
637
|
|
|
$newContent[$c++] = '<' . ($endTag ? '/' : '') . $tagContent . '>'; |
638
|
|
|
} |
639
|
|
|
} elseif ($keepAll) { |
640
|
|
|
// This is if the tag was not defined in the array for processing: |
641
|
|
|
if ($keepAll === 'protect') { |
642
|
|
|
$lt = '<'; |
643
|
|
|
$gt = '>'; |
644
|
|
|
} else { |
645
|
|
|
$lt = '<'; |
646
|
|
|
$gt = '>'; |
647
|
|
|
} |
648
|
|
|
$newContent[$c++] = $lt . ($endTag ? '/' : '') . $tagContent . $gt; |
649
|
|
|
} |
650
|
|
|
$newContent[$c++] = $this->bidir_htmlspecialchars(substr($tok, $tagEnd + 1), $hSC); |
651
|
|
|
} else { |
652
|
|
|
$newContent[$c++] = $this->bidir_htmlspecialchars('<' . $tok, $hSC); |
653
|
|
|
} |
654
|
|
|
} else { |
655
|
|
|
$newContent[$c++] = $this->bidir_htmlspecialchars(($skipTag ? '' : '<') . $tok, $hSC); |
656
|
|
|
// It was not a tag anyways |
657
|
|
|
$skipTag = false; |
658
|
|
|
} |
659
|
|
|
} |
660
|
|
|
// Unsetting tags: |
661
|
|
|
foreach ($tagRegister as $tag => $positions) { |
662
|
|
|
foreach ($positions as $pKey) { |
663
|
|
|
unset($newContent[$pKey]); |
664
|
|
|
} |
665
|
|
|
} |
666
|
|
|
$newContent = implode('', $newContent); |
667
|
|
|
$newContent = $this->stripEmptyTagsIfConfigured($newContent, $addConfig); |
668
|
|
|
return $newContent; |
669
|
|
|
} |
670
|
|
|
|
671
|
|
|
/** |
672
|
|
|
* Converts htmlspecialchars forth ($dir=1) AND back ($dir=-1) |
673
|
|
|
* |
674
|
|
|
* @param string $value Input value |
675
|
|
|
* @param int $dir Direction: forth ($dir=1, dir=2 for preserving entities) AND back ($dir=-1) |
676
|
|
|
* @return string Output value |
677
|
|
|
*/ |
678
|
|
|
public function bidir_htmlspecialchars($value, $dir) |
679
|
|
|
{ |
680
|
|
|
switch ((int)$dir) { |
681
|
|
|
case 1: |
682
|
|
|
return htmlspecialchars($value); |
683
|
|
|
case 2: |
684
|
|
|
return htmlspecialchars($value, ENT_COMPAT, 'UTF-8', false); |
685
|
|
|
case -1: |
686
|
|
|
return htmlspecialchars_decode($value); |
687
|
|
|
default: |
688
|
|
|
return $value; |
689
|
|
|
} |
690
|
|
|
} |
691
|
|
|
|
692
|
|
|
/** |
693
|
|
|
* Prefixes the relative paths of hrefs/src/action in the tags [td,table,body,img,input,form,link,script,a] in the $content with the $main_prefix or and alternative given by $alternatives |
694
|
|
|
* |
695
|
|
|
* @param string $main_prefix Prefix string |
696
|
|
|
* @param string $content HTML content |
697
|
|
|
* @param array $alternatives Array with alternative prefixes for certain of the tags. key=>value pairs where the keys are the tag element names in uppercase |
698
|
|
|
* @param string $suffix Suffix string (put after the resource). |
699
|
|
|
* @return string Processed HTML content |
700
|
|
|
*/ |
701
|
|
|
public function prefixResourcePath($main_prefix, $content, $alternatives = [], $suffix = '') |
702
|
|
|
{ |
703
|
|
|
$parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a,param', $content); |
704
|
|
|
foreach ($parts as $k => $v) { |
705
|
|
|
if ($k % 2) { |
706
|
|
|
$params = $this->get_tag_attributes($v); |
707
|
|
|
// Detect tag-ending so that it is re-applied correctly. |
708
|
|
|
$tagEnd = substr($v, -2) === '/>' ? ' />' : '>'; |
709
|
|
|
// The 'name' of the first tag |
710
|
|
|
$firstTagName = $this->getFirstTagName($v); |
711
|
|
|
$somethingDone = 0; |
712
|
|
|
$prefix = $alternatives[strtoupper($firstTagName)] ?? $main_prefix; |
713
|
|
|
switch (strtolower($firstTagName)) { |
714
|
|
|
case 'td': |
715
|
|
|
|
716
|
|
|
case 'body': |
717
|
|
|
|
718
|
|
|
case 'table': |
719
|
|
|
$src = $params[0]['background']; |
720
|
|
|
if ($src) { |
721
|
|
|
$params[0]['background'] = $this->prefixRelPath($prefix, $params[0]['background'], $suffix); |
722
|
|
|
$somethingDone = 1; |
723
|
|
|
} |
724
|
|
|
break; |
725
|
|
|
case 'img': |
726
|
|
|
|
727
|
|
|
case 'input': |
728
|
|
|
|
729
|
|
|
case 'script': |
730
|
|
|
|
731
|
|
|
case 'embed': |
732
|
|
|
$src = $params[0]['src']; |
733
|
|
|
if ($src) { |
734
|
|
|
$params[0]['src'] = $this->prefixRelPath($prefix, $params[0]['src'], $suffix); |
735
|
|
|
$somethingDone = 1; |
736
|
|
|
} |
737
|
|
|
break; |
738
|
|
|
case 'link': |
739
|
|
|
|
740
|
|
|
case 'a': |
741
|
|
|
$src = $params[0]['href']; |
742
|
|
|
if ($src) { |
743
|
|
|
$params[0]['href'] = $this->prefixRelPath($prefix, $params[0]['href'], $suffix); |
744
|
|
|
$somethingDone = 1; |
745
|
|
|
} |
746
|
|
|
break; |
747
|
|
|
case 'form': |
748
|
|
|
$src = $params[0]['action']; |
749
|
|
|
if ($src) { |
750
|
|
|
$params[0]['action'] = $this->prefixRelPath($prefix, $params[0]['action'], $suffix); |
751
|
|
|
$somethingDone = 1; |
752
|
|
|
} |
753
|
|
|
break; |
754
|
|
|
case 'param': |
755
|
|
|
$test = $params[0]['name']; |
756
|
|
|
if ($test && $test === 'movie') { |
757
|
|
|
if ($params[0]['value']) { |
758
|
|
|
$params[0]['value'] = $this->prefixRelPath($prefix, $params[0]['value'], $suffix); |
759
|
|
|
$somethingDone = 1; |
760
|
|
|
} |
761
|
|
|
} |
762
|
|
|
break; |
763
|
|
|
} |
764
|
|
|
if ($somethingDone) { |
765
|
|
|
$tagParts = preg_split('/\\s+/s', $v, 2); |
766
|
|
|
$tagParts[1] = $this->compileTagAttribs($params[0], $params[1]); |
767
|
|
|
$parts[$k] = '<' . trim(strtolower($firstTagName) . ' ' . $tagParts[1]) . $tagEnd; |
768
|
|
|
} |
769
|
|
|
} |
770
|
|
|
} |
771
|
|
|
$content = implode('', $parts); |
772
|
|
|
// Fix <style> section: |
773
|
|
|
$prefix = $alternatives['style'] ?? $main_prefix; |
774
|
|
|
if ((string)$prefix !== '') { |
775
|
|
|
$parts = $this->splitIntoBlock('style', $content); |
776
|
|
|
foreach ($parts as $k => &$part) { |
777
|
|
|
if ($k % 2) { |
778
|
|
|
$part = preg_replace('/(url[[:space:]]*\\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\\))/i', '\\1' . $prefix . '\\2' . $suffix . '\\3', $part); |
779
|
|
|
} |
780
|
|
|
} |
781
|
|
|
unset($part); |
782
|
|
|
$content = implode('', $parts); |
783
|
|
|
} |
784
|
|
|
return $content; |
785
|
|
|
} |
786
|
|
|
|
787
|
|
|
/** |
788
|
|
|
* Internal sub-function for ->prefixResourcePath() |
789
|
|
|
* |
790
|
|
|
* @param string $prefix Prefix string |
791
|
|
|
* @param string $srcVal Relative path/URL |
792
|
|
|
* @param string $suffix Suffix string |
793
|
|
|
* @return string Output path, prefixed if no scheme in input string |
794
|
|
|
* @internal |
795
|
|
|
*/ |
796
|
|
|
public function prefixRelPath($prefix, $srcVal, $suffix = '') |
797
|
|
|
{ |
798
|
|
|
// Only prefix if it's not an absolute URL or |
799
|
|
|
// only a link to a section within the page. |
800
|
|
|
if ($srcVal[0] !== '/' && $srcVal[0] !== '#') { |
801
|
|
|
$urlParts = parse_url($srcVal); |
802
|
|
|
// Only prefix URLs without a scheme |
803
|
|
|
if (!$urlParts['scheme']) { |
804
|
|
|
$srcVal = $prefix . $srcVal . $suffix; |
805
|
|
|
} |
806
|
|
|
} |
807
|
|
|
return $srcVal; |
808
|
|
|
} |
809
|
|
|
|
810
|
|
|
/** |
811
|
|
|
* Internal function for case shifting of a string or whole array |
812
|
|
|
* |
813
|
|
|
* @param mixed $str Input string/array |
814
|
|
|
* @param bool $caseSensitiveComparison If this value is FALSE, the string is returned in uppercase |
815
|
|
|
* @param string $cacheKey Key string used for internal caching of the results. Could be an MD5 hash of the serialized version of the input $str if that is an array. |
816
|
|
|
* @return array|string Output string, processed |
817
|
|
|
* @internal |
818
|
|
|
*/ |
819
|
|
|
public function caseShift($str, $caseSensitiveComparison, $cacheKey = '') |
820
|
|
|
{ |
821
|
|
|
if ($caseSensitiveComparison) { |
822
|
|
|
return $str; |
823
|
|
|
} |
824
|
|
|
if (is_array($str)) { |
825
|
|
|
// Fetch from runlevel cache |
826
|
|
|
if ($cacheKey && isset($this->caseShift_cache[$cacheKey])) { |
827
|
|
|
$str = $this->caseShift_cache[$cacheKey]; |
828
|
|
|
} else { |
829
|
|
|
array_walk($str, function (&$value) { |
830
|
|
|
$value = strtoupper($value); |
831
|
|
|
}); |
832
|
|
|
if ($cacheKey) { |
833
|
|
|
$this->caseShift_cache[$cacheKey] = $str; |
834
|
|
|
} |
835
|
|
|
} |
836
|
|
|
} else { |
837
|
|
|
$str = strtoupper($str); |
838
|
|
|
} |
839
|
|
|
return $str; |
840
|
|
|
} |
841
|
|
|
|
842
|
|
|
/** |
843
|
|
|
* Compiling an array with tag attributes into a string |
844
|
|
|
* |
845
|
|
|
* @param array $tagAttrib Tag attributes |
846
|
|
|
* @param array $meta Meta information about these attributes (like if they were quoted) |
847
|
|
|
* @return string Imploded attributes, eg: 'attribute="value" attrib2="value2"' |
848
|
|
|
* @internal |
849
|
|
|
*/ |
850
|
|
|
public function compileTagAttribs($tagAttrib, $meta = []) |
851
|
|
|
{ |
852
|
|
|
$accu = []; |
853
|
|
|
foreach ($tagAttrib as $k => $v) { |
854
|
|
|
$attr = $meta[$k]['origTag'] ?? $k; |
855
|
|
|
if (strcmp($v, '') || isset($meta[$k]['dashType'])) { |
856
|
|
|
$dash = $meta[$k]['dashType'] ?? (MathUtility::canBeInterpretedAsInteger($v) ? '' : '"'); |
857
|
|
|
$attr .= '=' . $dash . $v . $dash; |
858
|
|
|
} |
859
|
|
|
$accu[] = $attr; |
860
|
|
|
} |
861
|
|
|
return implode(' ', $accu); |
862
|
|
|
} |
863
|
|
|
|
864
|
|
|
/** |
865
|
|
|
* Converts TSconfig into an array for the HTMLcleaner function. |
866
|
|
|
* |
867
|
|
|
* @param array $TSconfig TSconfig for HTMLcleaner |
868
|
|
|
* @param array $keepTags Array of tags to keep (?) |
869
|
|
|
* @return array |
870
|
|
|
* @internal |
871
|
|
|
*/ |
872
|
|
|
public function HTMLparserConfig($TSconfig, $keepTags = []) |
873
|
|
|
{ |
874
|
|
|
// Allow tags (base list, merged with incoming array) |
875
|
|
|
$alTags = array_flip(GeneralUtility::trimExplode(',', strtolower($TSconfig['allowTags'] ?? ''), true)); |
876
|
|
|
$keepTags = array_merge($alTags, $keepTags); |
877
|
|
|
// Set config properties. |
878
|
|
|
if (isset($TSconfig['tags.']) && is_array($TSconfig['tags.'])) { |
879
|
|
|
foreach ($TSconfig['tags.'] as $key => $tagC) { |
880
|
|
|
if (!is_array($tagC) && $key == strtolower($key)) { |
881
|
|
|
if ((string)$tagC === '0') { |
882
|
|
|
unset($keepTags[$key]); |
883
|
|
|
} |
884
|
|
|
if ((string)$tagC === '1' && !isset($keepTags[$key])) { |
885
|
|
|
$keepTags[$key] = 1; |
886
|
|
|
} |
887
|
|
|
} |
888
|
|
|
} |
889
|
|
|
foreach ($TSconfig['tags.'] as $key => $tagC) { |
890
|
|
|
if (is_array($tagC) && $key == strtolower($key)) { |
891
|
|
|
$key = substr($key, 0, -1); |
892
|
|
|
if (!is_array($keepTags[$key])) { |
893
|
|
|
$keepTags[$key] = []; |
894
|
|
|
} |
895
|
|
|
if (isset($tagC['fixAttrib.']) && is_array($tagC['fixAttrib.'])) { |
896
|
|
|
foreach ($tagC['fixAttrib.'] as $atName => $atConfig) { |
897
|
|
|
if (is_array($atConfig)) { |
898
|
|
|
$atName = substr($atName, 0, -1); |
899
|
|
|
if (!is_array($keepTags[$key]['fixAttrib'][$atName])) { |
900
|
|
|
$keepTags[$key]['fixAttrib'][$atName] = []; |
901
|
|
|
} |
902
|
|
|
$keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName], $atConfig); |
903
|
|
|
if ((string)$keepTags[$key]['fixAttrib'][$atName]['range'] !== '') { |
904
|
|
|
$keepTags[$key]['fixAttrib'][$atName]['range'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['range']); |
905
|
|
|
} |
906
|
|
|
if ((string)$keepTags[$key]['fixAttrib'][$atName]['list'] !== '') { |
907
|
|
|
$keepTags[$key]['fixAttrib'][$atName]['list'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['list']); |
908
|
|
|
} |
909
|
|
|
} |
910
|
|
|
} |
911
|
|
|
} |
912
|
|
|
unset($tagC['fixAttrib.'], $tagC['fixAttrib']); |
913
|
|
|
if (!empty($tagC['rmTagIfNoAttrib']) && empty($tagC['nesting'])) { |
914
|
|
|
$tagC['nesting'] = 1; |
915
|
|
|
} |
916
|
|
|
$keepTags[$key] = array_merge($keepTags[$key], $tagC); |
917
|
|
|
} |
918
|
|
|
} |
919
|
|
|
} |
920
|
|
|
// LocalNesting |
921
|
|
|
if (!empty($TSconfig['localNesting'])) { |
922
|
|
|
$lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['localNesting']), true); |
923
|
|
|
foreach ($lN as $tn) { |
924
|
|
|
if (isset($keepTags[$tn])) { |
925
|
|
|
if (!is_array($keepTags[$tn])) { |
926
|
|
|
$keepTags[$tn] = []; |
927
|
|
|
} |
928
|
|
|
$keepTags[$tn]['nesting'] = 1; |
929
|
|
|
} |
930
|
|
|
} |
931
|
|
|
} |
932
|
|
|
if (!empty($TSconfig['globalNesting'])) { |
933
|
|
|
$lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['globalNesting']), true); |
934
|
|
|
foreach ($lN as $tn) { |
935
|
|
|
if (isset($keepTags[$tn])) { |
936
|
|
|
if (!is_array($keepTags[$tn])) { |
937
|
|
|
$keepTags[$tn] = []; |
938
|
|
|
} |
939
|
|
|
$keepTags[$tn]['nesting'] = 'global'; |
940
|
|
|
} |
941
|
|
|
} |
942
|
|
|
} |
943
|
|
|
if (!empty($TSconfig['rmTagIfNoAttrib'])) { |
944
|
|
|
$lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['rmTagIfNoAttrib']), true); |
945
|
|
|
foreach ($lN as $tn) { |
946
|
|
|
if (isset($keepTags[$tn])) { |
947
|
|
|
if (!is_array($keepTags[$tn])) { |
948
|
|
|
$keepTags[$tn] = []; |
949
|
|
|
} |
950
|
|
|
$keepTags[$tn]['rmTagIfNoAttrib'] = 1; |
951
|
|
|
if (empty($keepTags[$tn]['nesting'])) { |
952
|
|
|
$keepTags[$tn]['nesting'] = 1; |
953
|
|
|
} |
954
|
|
|
} |
955
|
|
|
} |
956
|
|
|
} |
957
|
|
|
if (!empty($TSconfig['noAttrib'])) { |
958
|
|
|
$lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['noAttrib']), true); |
959
|
|
|
foreach ($lN as $tn) { |
960
|
|
|
if (isset($keepTags[$tn])) { |
961
|
|
|
if (!is_array($keepTags[$tn])) { |
962
|
|
|
$keepTags[$tn] = []; |
963
|
|
|
} |
964
|
|
|
$keepTags[$tn]['allowedAttribs'] = 0; |
965
|
|
|
} |
966
|
|
|
} |
967
|
|
|
} |
968
|
|
|
if (!empty($TSconfig['removeTags'])) { |
969
|
|
|
$lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['removeTags']), true); |
970
|
|
|
foreach ($lN as $tn) { |
971
|
|
|
$keepTags[$tn] = []; |
972
|
|
|
$keepTags[$tn]['allowedAttribs'] = 0; |
973
|
|
|
$keepTags[$tn]['rmTagIfNoAttrib'] = 1; |
974
|
|
|
} |
975
|
|
|
} |
976
|
|
|
// Create additional configuration: |
977
|
|
|
$addConfig = []; |
978
|
|
|
if (isset($TSconfig['stripEmptyTags'])) { |
979
|
|
|
$addConfig['stripEmptyTags'] = $TSconfig['stripEmptyTags']; |
980
|
|
|
if (isset($TSconfig['stripEmptyTags.'])) { |
981
|
|
|
$addConfig['stripEmptyTags.'] = $TSconfig['stripEmptyTags.']; |
982
|
|
|
} |
983
|
|
|
} |
984
|
|
|
return [ |
985
|
|
|
$keepTags, |
986
|
|
|
'' . ($TSconfig['keepNonMatchedTags'] ?? ''), |
987
|
|
|
(int)($TSconfig['htmlSpecialChars'] ?? 0), |
988
|
|
|
$addConfig |
989
|
|
|
]; |
990
|
|
|
} |
991
|
|
|
|
992
|
|
|
/** |
993
|
|
|
* Strips empty tags from HTML. |
994
|
|
|
* |
995
|
|
|
* @param string $content The content to be stripped of empty tags |
996
|
|
|
* @param string $tagList The comma separated list of tags to be stripped. |
997
|
|
|
* If empty, all empty tags will be stripped |
998
|
|
|
* @param bool $treatNonBreakingSpaceAsEmpty If TRUE tags containing only entities will be treated as empty. |
999
|
|
|
* @param bool $keepTags If true, the provided tags will be kept instead of stripped. |
1000
|
|
|
* @return string the stripped content |
1001
|
|
|
*/ |
1002
|
|
|
public function stripEmptyTags($content, $tagList = '', $treatNonBreakingSpaceAsEmpty = false, $keepTags = false) |
1003
|
|
|
{ |
1004
|
|
|
if (!empty($tagList)) { |
1005
|
|
|
$tagRegEx = implode('|', GeneralUtility::trimExplode(',', $tagList, true)); |
1006
|
|
|
if ($keepTags) { |
1007
|
|
|
$tagRegEx = '(?!' . $tagRegEx . ')[^ >]+'; |
1008
|
|
|
} |
1009
|
|
|
} else { |
1010
|
|
|
$tagRegEx = '[^ >]+'; // all characters until you reach a > or space; |
1011
|
|
|
} |
1012
|
|
|
$count = 1; |
1013
|
|
|
$nbspRegex = $treatNonBreakingSpaceAsEmpty ? '|( )' : ''; |
1014
|
|
|
$finalRegex = sprintf('/<(%s)[^>]*>( %s)*<\/\\1[^>]*>/i', $tagRegEx, $nbspRegex); |
1015
|
|
|
while ($count !== 0) { |
1016
|
|
|
$content = preg_replace($finalRegex, '', $content, -1, $count) ?? $content; |
1017
|
|
|
} |
1018
|
|
|
return $content; |
1019
|
|
|
} |
1020
|
|
|
|
1021
|
|
|
/** |
1022
|
|
|
* Strips the configured empty tags from the HMTL code. |
1023
|
|
|
* |
1024
|
|
|
* @param string $value |
1025
|
|
|
* @param array $configuration |
1026
|
|
|
* @return string |
1027
|
|
|
*/ |
1028
|
|
|
protected function stripEmptyTagsIfConfigured($value, $configuration) |
1029
|
|
|
{ |
1030
|
|
|
if (empty($configuration['stripEmptyTags'])) { |
1031
|
|
|
return $value; |
1032
|
|
|
} |
1033
|
|
|
|
1034
|
|
|
$tags = null; |
1035
|
|
|
$keepTags = false; |
1036
|
|
|
if (!empty($configuration['stripEmptyTags.']['keepTags'])) { |
1037
|
|
|
$tags = $configuration['stripEmptyTags.']['keepTags']; |
1038
|
|
|
$keepTags = true; |
1039
|
|
|
} elseif (!empty($configuration['stripEmptyTags.']['tags'])) { |
1040
|
|
|
$tags = $configuration['stripEmptyTags.']['tags']; |
1041
|
|
|
} |
1042
|
|
|
|
1043
|
|
|
$treatNonBreakingSpaceAsEmpty = !empty($configuration['stripEmptyTags.']['treatNonBreakingSpaceAsEmpty']); |
1044
|
|
|
|
1045
|
|
|
return $this->stripEmptyTags($value, $tags, $treatNonBreakingSpaceAsEmpty, $keepTags); |
1046
|
|
|
} |
1047
|
|
|
} |
1048
|
|
|
|