Total Complexity | 230 |
Total Lines | 1020 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like HtmlParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HtmlParser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
26 | class HtmlParser |
||
27 | { |
||
28 | /** |
||
29 | * @var array |
||
30 | */ |
||
31 | protected $caseShift_cache = []; |
||
32 | |||
33 | // Void elements that do not have closing tags, as defined by HTML5, except link element |
||
34 | const VOID_ELEMENTS = 'area|base|br|col|command|embed|hr|img|input|keygen|meta|param|source|track|wbr'; |
||
35 | |||
36 | /************************************ |
||
37 | * |
||
38 | * Parsing HTML code |
||
39 | * |
||
40 | ************************************/ |
||
41 | /** |
||
42 | * Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag |
||
43 | * Even numbers in the array are outside the blocks, Odd numbers are block-content. |
||
44 | * Use ->removeFirstAndLastTag() to process the content if needed. |
||
45 | * |
||
46 | * @param string $tag List of tags, comma separated. |
||
47 | * @param string $content HTML-content |
||
48 | * @param bool $eliminateExtraEndTags If set, excessive end tags are ignored - you should probably set this in most cases. |
||
49 | * @return array Even numbers in the array are outside the blocks, Odd numbers are block-content. |
||
50 | * @see splitTags() |
||
51 | * @see removeFirstAndLastTag() |
||
52 | */ |
||
53 | public function splitIntoBlock($tag, $content, $eliminateExtraEndTags = false) |
||
54 | { |
||
55 | $tags = array_unique(GeneralUtility::trimExplode(',', $tag, true)); |
||
56 | array_walk($tags, function (&$tag) { |
||
57 | $tag = preg_quote($tag, '/'); |
||
58 | }); |
||
59 | $regexStr = '/\\<\\/?(' . implode('|', $tags) . ')(\\s*\\>|\\s[^\\>]*\\>)/si'; |
||
60 | $parts = preg_split($regexStr, $content); |
||
61 | if (empty($parts)) { |
||
62 | return []; |
||
63 | } |
||
64 | $newParts = []; |
||
65 | $pointer = strlen($parts[0]); |
||
66 | $buffer = $parts[0]; |
||
67 | $nested = 0; |
||
68 | reset($parts); |
||
69 | // We skip the first element in foreach loop |
||
70 | $partsSliced = array_slice($parts, 1, null, true); |
||
71 | foreach ($partsSliced as $v) { |
||
72 | $isEndTag = substr($content, $pointer, 2) === '</'; |
||
73 | $tagLen = strcspn(substr($content, $pointer), '>') + 1; |
||
74 | // We meet a start-tag: |
||
75 | if (!$isEndTag) { |
||
76 | // Ground level: |
||
77 | if (!$nested) { |
||
78 | // Previous buffer stored |
||
79 | $newParts[] = $buffer; |
||
80 | $buffer = ''; |
||
81 | } |
||
82 | // We are inside now! |
||
83 | $nested++; |
||
84 | // New buffer set and pointer increased |
||
85 | $mbuffer = substr($content, $pointer, strlen($v) + $tagLen); |
||
86 | $pointer += strlen($mbuffer); |
||
87 | $buffer .= $mbuffer; |
||
88 | } else { |
||
89 | // If we meet an endtag: |
||
90 | // Decrease nested-level |
||
91 | $nested--; |
||
92 | $eliminated = 0; |
||
93 | if ($eliminateExtraEndTags && $nested < 0) { |
||
94 | $nested = 0; |
||
95 | $eliminated = 1; |
||
96 | } else { |
||
97 | // In any case, add the endtag to current buffer and increase pointer |
||
98 | $buffer .= substr($content, $pointer, $tagLen); |
||
99 | } |
||
100 | $pointer += $tagLen; |
||
101 | // if we're back on ground level, (and not by eliminating tags... |
||
102 | if (!$nested && !$eliminated) { |
||
103 | $newParts[] = $buffer; |
||
104 | $buffer = ''; |
||
105 | } |
||
106 | // New buffer set and pointer increased |
||
107 | $mbuffer = substr($content, $pointer, strlen($v)); |
||
108 | $pointer += strlen($mbuffer); |
||
109 | $buffer .= $mbuffer; |
||
110 | } |
||
111 | } |
||
112 | $newParts[] = $buffer; |
||
113 | return $newParts; |
||
114 | } |
||
115 | |||
116 | /** |
||
117 | * Splitting content into blocks *recursively* and processing tags/content with call back functions. |
||
118 | * |
||
119 | * @param string $tag Tag list, see splitIntoBlock() |
||
120 | * @param string $content Content, see splitIntoBlock() |
||
121 | * @param object $procObj Object where call back methods are. |
||
122 | * @param string $callBackContent Name of call back method for content; "function callBackContent($str,$level) |
||
123 | * @param string $callBackTags Name of call back method for tags; "function callBackTags($tags,$level) |
||
124 | * @param int $level Indent level |
||
125 | * @return string Processed content |
||
126 | * @see splitIntoBlock() |
||
127 | */ |
||
128 | public function splitIntoBlockRecursiveProc($tag, $content, &$procObj, $callBackContent, $callBackTags, $level = 0) |
||
150 | } |
||
151 | |||
152 | /** |
||
153 | * Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag |
||
154 | * Even numbers in the array are outside the blocks, Odd numbers are block-content. |
||
155 | * Use ->removeFirstAndLastTag() to process the content if needed. |
||
156 | * |
||
157 | * @param string $tag List of tags |
||
158 | * @param string $content HTML-content |
||
159 | * @return array Even numbers in the array are outside the blocks, Odd numbers are block-content. |
||
160 | * @see splitIntoBlock() |
||
161 | * @see removeFirstAndLastTag() |
||
162 | */ |
||
163 | public function splitTags($tag, $content) |
||
164 | { |
||
165 | $tags = GeneralUtility::trimExplode(',', $tag, true); |
||
166 | array_walk($tags, function (&$tag) { |
||
167 | $tag = preg_quote($tag, '/'); |
||
168 | }); |
||
169 | $regexStr = '/\\<(' . implode('|', $tags) . ')(\\s[^>]*)?\\/?>/si'; |
||
170 | $parts = preg_split($regexStr, $content); |
||
171 | if (empty($parts)) { |
||
172 | return []; |
||
173 | } |
||
174 | $pointer = strlen($parts[0]); |
||
175 | $newParts = []; |
||
176 | $newParts[] = $parts[0]; |
||
177 | reset($parts); |
||
178 | // We skip the first element in foreach loop |
||
179 | $partsSliced = array_slice($parts, 1, null, true); |
||
180 | foreach ($partsSliced as $v) { |
||
181 | $tagLen = strcspn(substr($content, $pointer), '>') + 1; |
||
182 | // Set tag: |
||
183 | // New buffer set and pointer increased |
||
184 | $tag = substr($content, $pointer, $tagLen); |
||
185 | $newParts[] = $tag; |
||
186 | $pointer += strlen($tag); |
||
187 | // Set content: |
||
188 | $newParts[] = $v; |
||
189 | $pointer += strlen($v); |
||
190 | } |
||
191 | return $newParts; |
||
192 | } |
||
193 | |||
194 | /** |
||
195 | * Removes the first and last tag in the string |
||
196 | * Anything before the first and after the last tags respectively is also removed |
||
197 | * |
||
198 | * @param string $str String to process |
||
199 | * @return string |
||
200 | */ |
||
201 | public function removeFirstAndLastTag($str) |
||
202 | { |
||
203 | // End of first tag: |
||
204 | $start = strpos($str, '>'); |
||
205 | // Begin of last tag: |
||
206 | $end = strrpos($str, '<'); |
||
207 | // Return |
||
208 | return substr($str, $start + 1, $end - $start - 1); |
||
209 | } |
||
210 | |||
211 | /** |
||
212 | * Returns the first tag in $str |
||
213 | * Actually everything from the beginning of the $str is returned, so you better make sure the tag is the first thing... |
||
214 | * |
||
215 | * @param string $str HTML string with tags |
||
216 | * @return string |
||
217 | */ |
||
218 | public function getFirstTag($str) |
||
219 | { |
||
220 | // First: |
||
221 | $endLen = strpos($str, '>'); |
||
222 | return $endLen !== false ? substr($str, 0, $endLen + 1) : ''; |
||
223 | } |
||
224 | |||
225 | /** |
||
226 | * Returns the NAME of the first tag in $str |
||
227 | * |
||
228 | * @param string $str HTML tag (The element name MUST be separated from the attributes by a space character! Just *whitespace* will not do) |
||
229 | * @param bool $preserveCase If set, then the tag is NOT converted to uppercase by case is preserved. |
||
230 | * @return string Tag name in upper case |
||
231 | * @see getFirstTag() |
||
232 | */ |
||
233 | public function getFirstTagName($str, $preserveCase = false) |
||
234 | { |
||
235 | $matches = []; |
||
236 | if (preg_match('/^\\s*\\<([^\\s\\>]+)(\\s|\\>)/', $str, $matches) === 1) { |
||
237 | if (!$preserveCase) { |
||
238 | return strtoupper($matches[1]); |
||
239 | } |
||
240 | return $matches[1]; |
||
241 | } |
||
242 | return ''; |
||
243 | } |
||
244 | |||
245 | /** |
||
246 | * Returns an array with all attributes as keys. Attributes are only lowercase a-z |
||
247 | * If an attribute is empty (shorthand), then the value for the key is empty. You can check if it existed with isset() |
||
248 | * |
||
249 | * Compared to the method in GeneralUtility::get_tag_attributes this method also returns meta data about each |
||
250 | * attribute, e.g. if it is a shorthand attribute, and what the quotation is. Also, since all attribute keys |
||
251 | * are lower-cased, the meta information contains the original attribute name. |
||
252 | * |
||
253 | * @param string $tag Tag: $tag is either a whole tag (eg '<TAG OPTION ATTRIB=VALUE>') or the parameterlist (ex ' OPTION ATTRIB=VALUE>') |
||
254 | * @param bool $deHSC If set, the attribute values are de-htmlspecialchar'ed. Should actually always be set! |
||
255 | * @return array array(Tag attributes,Attribute meta-data) |
||
256 | */ |
||
257 | public function get_tag_attributes($tag, $deHSC = false) |
||
258 | { |
||
259 | [$components, $metaC] = $this->split_tag_attributes($tag); |
||
260 | // Attribute name is stored here |
||
261 | $name = ''; |
||
262 | $valuemode = false; |
||
263 | $attributes = []; |
||
264 | $attributesMeta = []; |
||
265 | if (is_array($components)) { |
||
266 | foreach ($components as $key => $val) { |
||
267 | // Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value |
||
268 | if ($val !== '=') { |
||
269 | if ($valuemode) { |
||
270 | if ($name) { |
||
271 | $attributes[$name] = $deHSC ? htmlspecialchars_decode($val) : $val; |
||
272 | $attributesMeta[$name]['dashType'] = $metaC[$key]; |
||
273 | $name = ''; |
||
274 | } |
||
275 | } else { |
||
276 | if ($namekey = preg_replace('/[^[:alnum:]_\\:\\-]/', '', $val) ?? '') { |
||
277 | $name = strtolower((string)$namekey); |
||
278 | $attributesMeta[$name] = []; |
||
279 | $attributesMeta[$name]['origTag'] = $namekey; |
||
280 | $attributes[$name] = ''; |
||
281 | } |
||
282 | } |
||
283 | $valuemode = false; |
||
284 | } else { |
||
285 | $valuemode = true; |
||
286 | } |
||
287 | } |
||
288 | return [$attributes, $attributesMeta]; |
||
289 | } |
||
290 | return [null, null]; |
||
291 | } |
||
292 | |||
293 | /** |
||
294 | * Returns an array with the 'components' from an attribute list. |
||
295 | * The result is normally analyzed by get_tag_attributes |
||
296 | * Removes tag-name if found. |
||
297 | * |
||
298 | * The difference between this method and the one in GeneralUtility is that this method actually determines |
||
299 | * more information on the attribute, e.g. if the value is enclosed by a " or ' character. |
||
300 | * That's why this method returns two arrays, the "components" and the "meta-information" of the "components". |
||
301 | * |
||
302 | * @param string $tag The tag or attributes |
||
303 | * @return array |
||
304 | * @internal |
||
305 | * @see \TYPO3\CMS\Core\Utility\GeneralUtility::split_tag_attributes() |
||
306 | */ |
||
307 | public function split_tag_attributes($tag) |
||
308 | { |
||
309 | $matches = []; |
||
310 | if (preg_match('/(\\<[^\\s]+\\s+)?(.*?)\\s*(\\>)?$/s', $tag, $matches) !== 1) { |
||
311 | return [[], []]; |
||
312 | } |
||
313 | $tag_tmp = $matches[2]; |
||
314 | $metaValue = []; |
||
315 | $value = []; |
||
316 | $matches = []; |
||
317 | if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\\s"\'\\=]+|\\=)/s', $tag_tmp, $matches) > 0) { |
||
318 | foreach ($matches[1] as $part) { |
||
319 | $firstChar = $part[0]; |
||
320 | if ($firstChar === '"' || $firstChar === '\'') { |
||
321 | $metaValue[] = $firstChar; |
||
322 | $value[] = substr($part, 1, -1); |
||
323 | } else { |
||
324 | $metaValue[] = ''; |
||
325 | $value[] = $part; |
||
326 | } |
||
327 | } |
||
328 | } |
||
329 | return [$value, $metaValue]; |
||
330 | } |
||
331 | |||
332 | /********************************* |
||
333 | * |
||
334 | * Clean HTML code |
||
335 | * |
||
336 | *********************************/ |
||
337 | /** |
||
338 | * Function that can clean up HTML content according to configuration given in the $tags array. |
||
339 | * |
||
340 | * Initializing the $tags array to allow a list of tags (in this case <B>,<I>,<U> and <A>), set it like this: $tags = array_flip(explode(',','b,a,i,u')) |
||
341 | * If the value of the $tags[$tagname] entry is an array, advanced processing of the tags is initialized. These are the options: |
||
342 | * |
||
343 | * $tags[$tagname] = Array( |
||
344 | * 'overrideAttribs' => '' If set, this string is preset as the attributes of the tag |
||
345 | * 'allowedAttribs' => '0' (zero) = no attributes allowed, '[commalist of attributes]' = only allowed attributes. If blank, all attributes are allowed. |
||
346 | * 'fixAttrib' => Array( |
||
347 | * '[attribute name]' => Array ( |
||
348 | * 'set' => Force the attribute value to this value. |
||
349 | * 'unset' => Boolean: If set, the attribute is unset. |
||
350 | * 'default' => If no attribute exists by this name, this value is set as default value (if this value is not blank) |
||
351 | * 'always' => Boolean. If set, the attribute is always processed. Normally an attribute is processed only if it exists |
||
352 | * 'trim,intval,lower,upper' => All booleans. If any of these keys are set, the value is passed through the respective PHP-functions. |
||
353 | * 'range' => Array ('[low limit]','[high limit, optional]') Setting integer range. |
||
354 | * 'list' => Array ('[value1/default]','[value2]','[value3]') Attribute must be in this list. If not, the value is set to the first element. |
||
355 | * 'removeIfFalse' => Boolean/'blank'. If set, then the attribute is removed if it is 'FALSE'. If this value is set to 'blank' then the value must be a blank string (that means a 'zero' value will not be removed) |
||
356 | * 'removeIfEquals' => [value] If the attribute value matches the value set here, then it is removed. |
||
357 | * 'casesensitiveComp' => 1 If set, then the removeIfEquals and list comparisons will be case sensitive. Otherwise not. |
||
358 | * ) |
||
359 | * ), |
||
360 | * 'protect' => '', Boolean. If set, the tag <> is converted to < and > |
||
361 | * 'remap' => '', String. If set, the tagname is remapped to this tagname |
||
362 | * 'rmTagIfNoAttrib' => '', Boolean. If set, then the tag is removed if no attributes happened to be there. |
||
363 | * 'nesting' => '', Boolean/'global'. If set TRUE, then this tag must have starting and ending tags in the correct order. Any tags not in this order will be discarded. Thus '</B><B><I></B></I></B>' will be converted to '<B><I></B></I>'. Is the value 'global' then true nesting in relation to other tags marked for 'global' nesting control is preserved. This means that if <B> and <I> are set for global nesting then this string '</B><B><I></B></I></B>' is converted to '<B></B>' |
||
364 | * ) |
||
365 | * |
||
366 | * @param string $content Is the HTML-content being processed. This is also the result being returned. |
||
367 | * @param array $tags Is an array where each key is a tagname in lowercase. Only tags present as keys in this array are preserved. The value of the key can be an array with a vast number of options to configure. |
||
368 | * @param mixed $keepAll Boolean/'protect', if set, then all tags are kept regardless of tags present as keys in $tags-array. If 'protect' then the preserved tags have their <> converted to < and > |
||
369 | * @param int $hSC Values -1,0,1,2: Set to zero= disabled, set to 1 then the content BETWEEN tags is htmlspecialchar()'ed, set to -1 its the opposite and set to 2 the content will be HSC'ed BUT with preservation for real entities (eg. "&" or "ê") |
||
370 | * @param array $addConfig Configuration array send along as $conf to the internal functions |
||
371 | * @return string Processed HTML content |
||
372 | */ |
||
373 | public function HTMLcleaner($content, $tags = [], $keepAll = 0, $hSC = 0, $addConfig = []) |
||
374 | { |
||
375 | $newContent = []; |
||
376 | $tokArr = explode('<', $content); |
||
377 | $newContent[] = $this->bidir_htmlspecialchars(current($tokArr), $hSC); |
||
378 | // We skip the first element in foreach loop |
||
379 | $tokArrSliced = array_slice($tokArr, 1, null, true); |
||
380 | $c = 1; |
||
381 | $tagRegister = []; |
||
382 | $tagStack = []; |
||
383 | $inComment = false; |
||
384 | $inCdata = false; |
||
385 | $skipTag = false; |
||
386 | foreach ($tokArrSliced as $tok) { |
||
387 | if ($inComment) { |
||
388 | if (($eocPos = strpos($tok, '-->')) === false) { |
||
389 | // End of comment is not found in the token. Go further until end of comment is found in other tokens. |
||
390 | $newContent[$c++] = '<' . $tok; |
||
391 | continue; |
||
392 | } |
||
393 | // Comment ends in the middle of the token: add comment and proceed with rest of the token |
||
394 | $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3); |
||
395 | $tok = substr($tok, $eocPos + 3); |
||
396 | $inComment = false; |
||
397 | $skipTag = true; |
||
398 | } elseif ($inCdata) { |
||
399 | if (($eocPos = strpos($tok, '/*]]>*/')) === false) { |
||
400 | // End of comment is not found in the token. Go further until end of comment is found in other tokens. |
||
401 | $newContent[$c++] = '<' . $tok; |
||
402 | continue; |
||
403 | } |
||
404 | // Comment ends in the middle of the token: add comment and proceed with rest of the token |
||
405 | $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10); |
||
406 | $tok = substr($tok, $eocPos + 10); |
||
407 | $inCdata = false; |
||
408 | $skipTag = true; |
||
409 | } elseif (strpos($tok, '!--') === 0) { |
||
410 | if (($eocPos = strpos($tok, '-->')) === false) { |
||
411 | // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment |
||
412 | $newContent[$c++] = '<' . $tok; |
||
413 | $inComment = true; |
||
414 | continue; |
||
415 | } |
||
416 | // Start and end of comment are both in the current token. Add comment and proceed with rest of the token |
||
417 | $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3); |
||
418 | $tok = substr($tok, $eocPos + 3); |
||
419 | $skipTag = true; |
||
420 | } elseif (strpos($tok, '![CDATA[*/') === 0) { |
||
421 | if (($eocPos = strpos($tok, '/*]]>*/')) === false) { |
||
422 | // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment |
||
423 | $newContent[$c++] = '<' . $tok; |
||
424 | $inCdata = true; |
||
425 | continue; |
||
426 | } |
||
427 | // Start and end of comment are both in the current token. Add comment and proceed with rest of the token |
||
428 | $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10); |
||
429 | $tok = substr($tok, $eocPos + 10); |
||
430 | $skipTag = true; |
||
431 | } |
||
432 | $firstChar = $tok[0] ?? null; |
||
433 | // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..> |
||
434 | if (!$skipTag && preg_match('/[[:alnum:]\\/]/', (string)$firstChar) === 1) { |
||
435 | $tagEnd = strpos($tok, '>'); |
||
436 | // If there is and end-bracket... tagEnd can't be 0 as the first character can't be a > |
||
437 | if ($tagEnd) { |
||
438 | $endTag = $firstChar === '/' ? 1 : 0; |
||
439 | $tagContent = substr($tok, $endTag, $tagEnd - $endTag); |
||
440 | $tagParts = preg_split('/\\s+/s', $tagContent, 2); |
||
441 | $tagName = strtolower($tagParts[0]); |
||
442 | $emptyTag = 0; |
||
443 | if (isset($tags[$tagName])) { |
||
444 | // If there is processing to do for the tag: |
||
445 | if (is_array($tags[$tagName])) { |
||
446 | if (preg_match('/^(' . self::VOID_ELEMENTS . ' )$/i', $tagName)) { |
||
447 | $emptyTag = 1; |
||
448 | } |
||
449 | // If NOT an endtag, do attribute processing (added dec. 2003) |
||
450 | if (!$endTag) { |
||
451 | // Override attributes |
||
452 | if (isset($tags[$tagName]['overrideAttribs']) && (string)$tags[$tagName]['overrideAttribs'] !== '') { |
||
453 | $tagParts[1] = $tags[$tagName]['overrideAttribs']; |
||
454 | } |
||
455 | // Allowed tags |
||
456 | if (isset($tags[$tagName]['allowedAttribs']) && (string)$tags[$tagName]['allowedAttribs'] !== '') { |
||
457 | // No attribs allowed |
||
458 | if ((string)$tags[$tagName]['allowedAttribs'] === '0') { |
||
459 | $tagParts[1] = ''; |
||
460 | } elseif (isset($tagParts[1]) && trim($tagParts[1])) { |
||
461 | $tagAttrib = $this->get_tag_attributes($tagParts[1]); |
||
462 | $tagParts[1] = ''; |
||
463 | $newTagAttrib = []; |
||
464 | $tList = (array)( |
||
465 | $tags[$tagName]['_allowedAttribs'] |
||
466 | ?? GeneralUtility::trimExplode(',', strtolower($tags[$tagName]['allowedAttribs']), true) |
||
467 | ); |
||
468 | foreach ($tList as $allowTag) { |
||
469 | if (isset($tagAttrib[0][$allowTag])) { |
||
470 | $newTagAttrib[$allowTag] = $tagAttrib[0][$allowTag]; |
||
471 | } |
||
472 | } |
||
473 | |||
474 | $tagParts[1] = $this->compileTagAttribs($newTagAttrib, $tagAttrib[1]); |
||
475 | } |
||
476 | } |
||
477 | // Fixed attrib values |
||
478 | if (isset($tags[$tagName]['fixAttrib']) && is_array($tags[$tagName]['fixAttrib'])) { |
||
479 | $tagAttrib = $this->get_tag_attributes($tagParts[1]); |
||
480 | $tagParts[1] = ''; |
||
481 | foreach ($tags[$tagName]['fixAttrib'] as $attr => $params) { |
||
482 | if (isset($params['set']) && $params['set'] !== '') { |
||
483 | $tagAttrib[0][$attr] = $params['set']; |
||
484 | } |
||
485 | if (!empty($params['unset'])) { |
||
486 | unset($tagAttrib[0][$attr]); |
||
487 | } |
||
488 | if (!empty($params['default']) && !isset($tagAttrib[0][$attr])) { |
||
489 | $tagAttrib[0][$attr] = $params['default']; |
||
490 | } |
||
491 | if ($params['always'] || isset($tagAttrib[0][$attr])) { |
||
492 | if ($params['trim']) { |
||
493 | $tagAttrib[0][$attr] = trim($tagAttrib[0][$attr]); |
||
494 | } |
||
495 | if ($params['intval']) { |
||
496 | $tagAttrib[0][$attr] = (int)$tagAttrib[0][$attr]; |
||
497 | } |
||
498 | if ($params['lower']) { |
||
499 | $tagAttrib[0][$attr] = strtolower($tagAttrib[0][$attr]); |
||
500 | } |
||
501 | if ($params['upper']) { |
||
502 | $tagAttrib[0][$attr] = strtoupper($tagAttrib[0][$attr]); |
||
503 | } |
||
504 | if ($params['range']) { |
||
505 | if (isset($params['range'][1])) { |
||
506 | $tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0], (int)$params['range'][1]); |
||
507 | } else { |
||
508 | $tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0]); |
||
509 | } |
||
510 | } |
||
511 | if (isset($params['list']) && is_array($params['list'])) { |
||
512 | // For the class attribute, remove from the attribute value any class not in the list |
||
513 | // Classes are case sensitive |
||
514 | if ($attr === 'class') { |
||
515 | $newClasses = []; |
||
516 | $classes = GeneralUtility::trimExplode(' ', $tagAttrib[0][$attr], true); |
||
517 | foreach ($classes as $class) { |
||
518 | if (in_array($class, $params['list'])) { |
||
519 | $newClasses[] = $class; |
||
520 | } |
||
521 | } |
||
522 | if (!empty($newClasses)) { |
||
523 | $tagAttrib[0][$attr] = implode(' ', $newClasses); |
||
524 | } else { |
||
525 | $tagAttrib[0][$attr] = $params['list'][0]; |
||
526 | } |
||
527 | } else { |
||
528 | if (!in_array($this->caseShift($tagAttrib[0][$attr], $params['casesensitiveComp']), (array)$this->caseShift($params['list'], $params['casesensitiveComp'], $tagName))) { |
||
529 | $tagAttrib[0][$attr] = $params['list'][0]; |
||
530 | } |
||
531 | } |
||
532 | } |
||
533 | if ($params['removeIfFalse'] && $params['removeIfFalse'] !== 'blank' && !$tagAttrib[0][$attr] || $params['removeIfFalse'] === 'blank' && (string)$tagAttrib[0][$attr] === '') { |
||
|
|||
534 | unset($tagAttrib[0][$attr]); |
||
535 | } |
||
536 | if ((string)$params['removeIfEquals'] !== '' && $this->caseShift($tagAttrib[0][$attr], $params['casesensitiveComp']) === $this->caseShift($params['removeIfEquals'], $params['casesensitiveComp'])) { |
||
537 | unset($tagAttrib[0][$attr]); |
||
538 | } |
||
539 | if ($params['prefixLocalAnchors']) { |
||
540 | if ($tagAttrib[0][$attr][0] === '#') { |
||
541 | if ($params['prefixLocalAnchors'] == 2) { |
||
542 | /** @var ContentObjectRenderer $contentObjectRenderer */ |
||
543 | $contentObjectRenderer = GeneralUtility::makeInstance(ContentObjectRenderer::class); |
||
544 | $prefix = $contentObjectRenderer->getUrlToCurrentLocation(); |
||
545 | } else { |
||
546 | $prefix = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL'); |
||
547 | } |
||
548 | $tagAttrib[0][$attr] = $prefix . $tagAttrib[0][$attr]; |
||
549 | } |
||
550 | } |
||
551 | if ($params['prefixRelPathWith']) { |
||
552 | $urlParts = parse_url($tagAttrib[0][$attr]); |
||
553 | if (!$urlParts['scheme'] && $urlParts['path'][0] !== '/') { |
||
554 | // If it is NOT an absolute URL (by http: or starting "/") |
||
555 | $tagAttrib[0][$attr] = $params['prefixRelPathWith'] . $tagAttrib[0][$attr]; |
||
556 | } |
||
557 | } |
||
558 | if ($params['userFunc']) { |
||
559 | if (is_array($params['userFunc.'])) { |
||
560 | $params['userFunc.']['attributeValue'] = $tagAttrib[0][$attr]; |
||
561 | } else { |
||
562 | $params['userFunc.'] = $tagAttrib[0][$attr]; |
||
563 | } |
||
564 | $tagAttrib[0][$attr] = GeneralUtility::callUserFunction($params['userFunc'], $params['userFunc.'], $this); |
||
565 | } |
||
566 | } |
||
567 | } |
||
568 | $tagParts[1] = $this->compileTagAttribs($tagAttrib[0], $tagAttrib[1]); |
||
569 | } |
||
570 | } else { |
||
571 | // If endTag, remove any possible attributes: |
||
572 | $tagParts[1] = ''; |
||
573 | } |
||
574 | // Protecting the tag by converting < and > to < and > ?? |
||
575 | if (!empty($tags[$tagName]['protect'])) { |
||
576 | $lt = '<'; |
||
577 | $gt = '>'; |
||
578 | } else { |
||
579 | $lt = '<'; |
||
580 | $gt = '>'; |
||
581 | } |
||
582 | // Remapping tag name? |
||
583 | if (!empty($tags[$tagName]['remap'])) { |
||
584 | $tagParts[0] = $tags[$tagName]['remap']; |
||
585 | } |
||
586 | // rmTagIfNoAttrib |
||
587 | if ($endTag || empty($tags[$tagName]['rmTagIfNoAttrib']) || trim($tagParts[1] ?? '')) { |
||
588 | $setTag = true; |
||
589 | // Remove this closing tag if $tagName was among $TSconfig['removeTags'] |
||
590 | if ($endTag && isset($tags[$tagName]['allowedAttribs']) && $tags[$tagName]['allowedAttribs'] === 0 && $tags[$tagName]['rmTagIfNoAttrib'] === 1) { |
||
591 | $setTag = false; |
||
592 | } |
||
593 | if (isset($tags[$tagName]['nesting'])) { |
||
594 | if (!isset($tagRegister[$tagName])) { |
||
595 | $tagRegister[$tagName] = []; |
||
596 | } |
||
597 | if ($endTag) { |
||
598 | $correctTag = true; |
||
599 | if ($tags[$tagName]['nesting'] === 'global') { |
||
600 | $lastEl = end($tagStack); |
||
601 | if ($tagName !== $lastEl) { |
||
602 | if (in_array($tagName, $tagStack, true)) { |
||
603 | while (!empty($tagStack) && $tagName !== $lastEl) { |
||
604 | $elPos = end($tagRegister[$lastEl]); |
||
605 | unset($newContent[$elPos]); |
||
606 | array_pop($tagRegister[$lastEl]); |
||
607 | array_pop($tagStack); |
||
608 | $lastEl = end($tagStack); |
||
609 | } |
||
610 | } else { |
||
611 | // In this case the |
||
612 | $correctTag = false; |
||
613 | } |
||
614 | } |
||
615 | } |
||
616 | if (empty($tagRegister[$tagName]) || !$correctTag) { |
||
617 | $setTag = false; |
||
618 | } else { |
||
619 | array_pop($tagRegister[$tagName]); |
||
620 | if ($tags[$tagName]['nesting'] === 'global') { |
||
621 | array_pop($tagStack); |
||
622 | } |
||
623 | } |
||
624 | } else { |
||
625 | $tagRegister[$tagName][] = $c; |
||
626 | if ($tags[$tagName]['nesting'] === 'global') { |
||
627 | $tagStack[] = $tagName; |
||
628 | } |
||
629 | } |
||
630 | } |
||
631 | if ($setTag) { |
||
632 | // Setting the tag |
||
633 | $newContent[$c++] = $lt . ($endTag ? '/' : '') . trim($tagParts[0] . ' ' . ($tagParts[1] ?? '')) . ($emptyTag ? ' /' : '') . $gt; |
||
634 | } |
||
635 | } |
||
636 | } else { |
||
637 | $newContent[$c++] = '<' . ($endTag ? '/' : '') . $tagContent . '>'; |
||
638 | } |
||
639 | } elseif ($keepAll) { |
||
640 | // This is if the tag was not defined in the array for processing: |
||
641 | if ($keepAll === 'protect') { |
||
642 | $lt = '<'; |
||
643 | $gt = '>'; |
||
644 | } else { |
||
645 | $lt = '<'; |
||
646 | $gt = '>'; |
||
647 | } |
||
648 | $newContent[$c++] = $lt . ($endTag ? '/' : '') . $tagContent . $gt; |
||
649 | } |
||
650 | $newContent[$c++] = $this->bidir_htmlspecialchars(substr($tok, $tagEnd + 1), $hSC); |
||
651 | } else { |
||
652 | $newContent[$c++] = $this->bidir_htmlspecialchars('<' . $tok, $hSC); |
||
653 | } |
||
654 | } else { |
||
655 | $newContent[$c++] = $this->bidir_htmlspecialchars(($skipTag ? '' : '<') . $tok, $hSC); |
||
656 | // It was not a tag anyways |
||
657 | $skipTag = false; |
||
658 | } |
||
659 | } |
||
660 | // Unsetting tags: |
||
661 | foreach ($tagRegister as $tag => $positions) { |
||
662 | foreach ($positions as $pKey) { |
||
663 | unset($newContent[$pKey]); |
||
664 | } |
||
665 | } |
||
666 | $newContent = implode('', $newContent); |
||
667 | $newContent = $this->stripEmptyTagsIfConfigured($newContent, $addConfig); |
||
668 | return $newContent; |
||
669 | } |
||
670 | |||
671 | /** |
||
672 | * Converts htmlspecialchars forth ($dir=1) AND back ($dir=-1) |
||
673 | * |
||
674 | * @param string $value Input value |
||
675 | * @param int $dir Direction: forth ($dir=1, dir=2 for preserving entities) AND back ($dir=-1) |
||
676 | * @return string Output value |
||
677 | */ |
||
678 | public function bidir_htmlspecialchars($value, $dir) |
||
679 | { |
||
680 | switch ((int)$dir) { |
||
681 | case 1: |
||
682 | return htmlspecialchars($value); |
||
683 | case 2: |
||
684 | return htmlspecialchars($value, ENT_COMPAT, 'UTF-8', false); |
||
685 | case -1: |
||
686 | return htmlspecialchars_decode($value); |
||
687 | default: |
||
688 | return $value; |
||
689 | } |
||
690 | } |
||
691 | |||
692 | /** |
||
693 | * Prefixes the relative paths of hrefs/src/action in the tags [td,table,body,img,input,form,link,script,a] in the $content with the $main_prefix or and alternative given by $alternatives |
||
694 | * |
||
695 | * @param string $main_prefix Prefix string |
||
696 | * @param string $content HTML content |
||
697 | * @param array $alternatives Array with alternative prefixes for certain of the tags. key=>value pairs where the keys are the tag element names in uppercase |
||
698 | * @param string $suffix Suffix string (put after the resource). |
||
699 | * @return string Processed HTML content |
||
700 | */ |
||
701 | public function prefixResourcePath($main_prefix, $content, $alternatives = [], $suffix = '') |
||
702 | { |
||
703 | $parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a,param', $content); |
||
704 | foreach ($parts as $k => $v) { |
||
705 | if ($k % 2) { |
||
706 | $params = $this->get_tag_attributes($v); |
||
707 | // Detect tag-ending so that it is re-applied correctly. |
||
708 | $tagEnd = substr($v, -2) === '/>' ? ' />' : '>'; |
||
709 | // The 'name' of the first tag |
||
710 | $firstTagName = $this->getFirstTagName($v); |
||
711 | $somethingDone = 0; |
||
712 | $prefix = $alternatives[strtoupper($firstTagName)] ?? $main_prefix; |
||
713 | switch (strtolower($firstTagName)) { |
||
714 | case 'td': |
||
715 | |||
716 | case 'body': |
||
717 | |||
718 | case 'table': |
||
719 | $src = $params[0]['background']; |
||
720 | if ($src) { |
||
721 | $params[0]['background'] = $this->prefixRelPath($prefix, $params[0]['background'], $suffix); |
||
722 | $somethingDone = 1; |
||
723 | } |
||
724 | break; |
||
725 | case 'img': |
||
726 | |||
727 | case 'input': |
||
728 | |||
729 | case 'script': |
||
730 | |||
731 | case 'embed': |
||
732 | $src = $params[0]['src']; |
||
733 | if ($src) { |
||
734 | $params[0]['src'] = $this->prefixRelPath($prefix, $params[0]['src'], $suffix); |
||
735 | $somethingDone = 1; |
||
736 | } |
||
737 | break; |
||
738 | case 'link': |
||
739 | |||
740 | case 'a': |
||
741 | $src = $params[0]['href']; |
||
742 | if ($src) { |
||
743 | $params[0]['href'] = $this->prefixRelPath($prefix, $params[0]['href'], $suffix); |
||
744 | $somethingDone = 1; |
||
745 | } |
||
746 | break; |
||
747 | case 'form': |
||
748 | $src = $params[0]['action']; |
||
749 | if ($src) { |
||
750 | $params[0]['action'] = $this->prefixRelPath($prefix, $params[0]['action'], $suffix); |
||
751 | $somethingDone = 1; |
||
752 | } |
||
753 | break; |
||
754 | case 'param': |
||
755 | $test = $params[0]['name']; |
||
756 | if ($test && $test === 'movie') { |
||
757 | if ($params[0]['value']) { |
||
758 | $params[0]['value'] = $this->prefixRelPath($prefix, $params[0]['value'], $suffix); |
||
759 | $somethingDone = 1; |
||
760 | } |
||
761 | } |
||
762 | break; |
||
763 | } |
||
764 | if ($somethingDone) { |
||
765 | $tagParts = preg_split('/\\s+/s', $v, 2); |
||
766 | $tagParts[1] = $this->compileTagAttribs($params[0], $params[1]); |
||
767 | $parts[$k] = '<' . trim(strtolower($firstTagName) . ' ' . $tagParts[1]) . $tagEnd; |
||
768 | } |
||
769 | } |
||
770 | } |
||
771 | $content = implode('', $parts); |
||
772 | // Fix <style> section: |
||
773 | $prefix = $alternatives['style'] ?? $main_prefix; |
||
774 | if ((string)$prefix !== '') { |
||
775 | $parts = $this->splitIntoBlock('style', $content); |
||
776 | foreach ($parts as $k => &$part) { |
||
777 | if ($k % 2) { |
||
778 | $part = preg_replace('/(url[[:space:]]*\\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\\))/i', '\\1' . $prefix . '\\2' . $suffix . '\\3', $part); |
||
779 | } |
||
780 | } |
||
781 | unset($part); |
||
782 | $content = implode('', $parts); |
||
783 | } |
||
784 | return $content; |
||
785 | } |
||
786 | |||
787 | /** |
||
788 | * Internal sub-function for ->prefixResourcePath() |
||
789 | * |
||
790 | * @param string $prefix Prefix string |
||
791 | * @param string $srcVal Relative path/URL |
||
792 | * @param string $suffix Suffix string |
||
793 | * @return string Output path, prefixed if no scheme in input string |
||
794 | * @internal |
||
795 | */ |
||
796 | public function prefixRelPath($prefix, $srcVal, $suffix = '') |
||
797 | { |
||
798 | // Only prefix if it's not an absolute URL or |
||
799 | // only a link to a section within the page. |
||
800 | if ($srcVal[0] !== '/' && $srcVal[0] !== '#') { |
||
801 | $urlParts = parse_url($srcVal); |
||
802 | // Only prefix URLs without a scheme |
||
803 | if (!$urlParts['scheme']) { |
||
804 | $srcVal = $prefix . $srcVal . $suffix; |
||
805 | } |
||
806 | } |
||
807 | return $srcVal; |
||
808 | } |
||
809 | |||
810 | /** |
||
811 | * Internal function for case shifting of a string or whole array |
||
812 | * |
||
813 | * @param mixed $str Input string/array |
||
814 | * @param bool $caseSensitiveComparison If this value is FALSE, the string is returned in uppercase |
||
815 | * @param string $cacheKey Key string used for internal caching of the results. Could be an MD5 hash of the serialized version of the input $str if that is an array. |
||
816 | * @return array|string Output string, processed |
||
817 | * @internal |
||
818 | */ |
||
819 | public function caseShift($str, $caseSensitiveComparison, $cacheKey = '') |
||
820 | { |
||
821 | if ($caseSensitiveComparison) { |
||
822 | return $str; |
||
823 | } |
||
824 | if (is_array($str)) { |
||
825 | // Fetch from runlevel cache |
||
826 | if ($cacheKey && isset($this->caseShift_cache[$cacheKey])) { |
||
827 | $str = $this->caseShift_cache[$cacheKey]; |
||
828 | } else { |
||
829 | array_walk($str, function (&$value) { |
||
830 | $value = strtoupper($value); |
||
831 | }); |
||
832 | if ($cacheKey) { |
||
833 | $this->caseShift_cache[$cacheKey] = $str; |
||
834 | } |
||
835 | } |
||
836 | } else { |
||
837 | $str = strtoupper($str); |
||
838 | } |
||
839 | return $str; |
||
840 | } |
||
841 | |||
842 | /** |
||
843 | * Compiling an array with tag attributes into a string |
||
844 | * |
||
845 | * @param array $tagAttrib Tag attributes |
||
846 | * @param array $meta Meta information about these attributes (like if they were quoted) |
||
847 | * @return string Imploded attributes, eg: 'attribute="value" attrib2="value2"' |
||
848 | * @internal |
||
849 | */ |
||
850 | public function compileTagAttribs($tagAttrib, $meta = []) |
||
851 | { |
||
852 | $accu = []; |
||
853 | foreach ($tagAttrib as $k => $v) { |
||
854 | $attr = $meta[$k]['origTag'] ?? $k; |
||
855 | if (strcmp($v, '') || isset($meta[$k]['dashType'])) { |
||
856 | $dash = $meta[$k]['dashType'] ?? (MathUtility::canBeInterpretedAsInteger($v) ? '' : '"'); |
||
857 | $attr .= '=' . $dash . $v . $dash; |
||
858 | } |
||
859 | $accu[] = $attr; |
||
860 | } |
||
861 | return implode(' ', $accu); |
||
862 | } |
||
863 | |||
864 | /** |
||
865 | * Converts TSconfig into an array for the HTMLcleaner function. |
||
866 | * |
||
867 | * @param array $TSconfig TSconfig for HTMLcleaner |
||
868 | * @param array $keepTags Array of tags to keep (?) |
||
869 | * @return array |
||
870 | * @internal |
||
871 | */ |
||
872 | public function HTMLparserConfig($TSconfig, $keepTags = []) |
||
873 | { |
||
874 | // Allow tags (base list, merged with incoming array) |
||
875 | $alTags = array_flip(GeneralUtility::trimExplode(',', strtolower($TSconfig['allowTags'] ?? ''), true)); |
||
876 | $keepTags = array_merge($alTags, $keepTags); |
||
877 | // Set config properties. |
||
878 | if (isset($TSconfig['tags.']) && is_array($TSconfig['tags.'])) { |
||
879 | foreach ($TSconfig['tags.'] as $key => $tagC) { |
||
880 | if (!is_array($tagC) && $key == strtolower($key)) { |
||
881 | if ((string)$tagC === '0') { |
||
882 | unset($keepTags[$key]); |
||
883 | } |
||
884 | if ((string)$tagC === '1' && !isset($keepTags[$key])) { |
||
885 | $keepTags[$key] = 1; |
||
886 | } |
||
887 | } |
||
888 | } |
||
889 | foreach ($TSconfig['tags.'] as $key => $tagC) { |
||
890 | if (is_array($tagC) && $key == strtolower($key)) { |
||
891 | $key = substr($key, 0, -1); |
||
892 | if (!is_array($keepTags[$key])) { |
||
893 | $keepTags[$key] = []; |
||
894 | } |
||
895 | if (isset($tagC['fixAttrib.']) && is_array($tagC['fixAttrib.'])) { |
||
896 | foreach ($tagC['fixAttrib.'] as $atName => $atConfig) { |
||
897 | if (is_array($atConfig)) { |
||
898 | $atName = substr($atName, 0, -1); |
||
899 | if (!is_array($keepTags[$key]['fixAttrib'][$atName])) { |
||
900 | $keepTags[$key]['fixAttrib'][$atName] = []; |
||
901 | } |
||
902 | $keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName], $atConfig); |
||
903 | if ((string)$keepTags[$key]['fixAttrib'][$atName]['range'] !== '') { |
||
904 | $keepTags[$key]['fixAttrib'][$atName]['range'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['range']); |
||
905 | } |
||
906 | if ((string)$keepTags[$key]['fixAttrib'][$atName]['list'] !== '') { |
||
907 | $keepTags[$key]['fixAttrib'][$atName]['list'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['list']); |
||
908 | } |
||
909 | } |
||
910 | } |
||
911 | } |
||
912 | unset($tagC['fixAttrib.'], $tagC['fixAttrib']); |
||
913 | if (!empty($tagC['rmTagIfNoAttrib']) && empty($tagC['nesting'])) { |
||
914 | $tagC['nesting'] = 1; |
||
915 | } |
||
916 | $keepTags[$key] = array_merge($keepTags[$key], $tagC); |
||
917 | } |
||
918 | } |
||
919 | } |
||
920 | // LocalNesting |
||
921 | if (!empty($TSconfig['localNesting'])) { |
||
922 | $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['localNesting']), true); |
||
923 | foreach ($lN as $tn) { |
||
924 | if (isset($keepTags[$tn])) { |
||
925 | if (!is_array($keepTags[$tn])) { |
||
926 | $keepTags[$tn] = []; |
||
927 | } |
||
928 | $keepTags[$tn]['nesting'] = 1; |
||
929 | } |
||
930 | } |
||
931 | } |
||
932 | if (!empty($TSconfig['globalNesting'])) { |
||
933 | $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['globalNesting']), true); |
||
934 | foreach ($lN as $tn) { |
||
935 | if (isset($keepTags[$tn])) { |
||
936 | if (!is_array($keepTags[$tn])) { |
||
937 | $keepTags[$tn] = []; |
||
938 | } |
||
939 | $keepTags[$tn]['nesting'] = 'global'; |
||
940 | } |
||
941 | } |
||
942 | } |
||
943 | if (!empty($TSconfig['rmTagIfNoAttrib'])) { |
||
944 | $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['rmTagIfNoAttrib']), true); |
||
945 | foreach ($lN as $tn) { |
||
946 | if (isset($keepTags[$tn])) { |
||
947 | if (!is_array($keepTags[$tn])) { |
||
948 | $keepTags[$tn] = []; |
||
949 | } |
||
950 | $keepTags[$tn]['rmTagIfNoAttrib'] = 1; |
||
951 | if (empty($keepTags[$tn]['nesting'])) { |
||
952 | $keepTags[$tn]['nesting'] = 1; |
||
953 | } |
||
954 | } |
||
955 | } |
||
956 | } |
||
957 | if (!empty($TSconfig['noAttrib'])) { |
||
958 | $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['noAttrib']), true); |
||
959 | foreach ($lN as $tn) { |
||
960 | if (isset($keepTags[$tn])) { |
||
961 | if (!is_array($keepTags[$tn])) { |
||
962 | $keepTags[$tn] = []; |
||
963 | } |
||
964 | $keepTags[$tn]['allowedAttribs'] = 0; |
||
965 | } |
||
966 | } |
||
967 | } |
||
968 | if (!empty($TSconfig['removeTags'])) { |
||
969 | $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['removeTags']), true); |
||
970 | foreach ($lN as $tn) { |
||
971 | $keepTags[$tn] = []; |
||
972 | $keepTags[$tn]['allowedAttribs'] = 0; |
||
973 | $keepTags[$tn]['rmTagIfNoAttrib'] = 1; |
||
974 | } |
||
975 | } |
||
976 | // Create additional configuration: |
||
977 | $addConfig = []; |
||
978 | if (isset($TSconfig['stripEmptyTags'])) { |
||
979 | $addConfig['stripEmptyTags'] = $TSconfig['stripEmptyTags']; |
||
980 | if (isset($TSconfig['stripEmptyTags.'])) { |
||
981 | $addConfig['stripEmptyTags.'] = $TSconfig['stripEmptyTags.']; |
||
982 | } |
||
983 | } |
||
984 | return [ |
||
985 | $keepTags, |
||
986 | '' . ($TSconfig['keepNonMatchedTags'] ?? ''), |
||
987 | (int)($TSconfig['htmlSpecialChars'] ?? 0), |
||
988 | $addConfig |
||
989 | ]; |
||
990 | } |
||
991 | |||
992 | /** |
||
993 | * Strips empty tags from HTML. |
||
994 | * |
||
995 | * @param string $content The content to be stripped of empty tags |
||
996 | * @param string $tagList The comma separated list of tags to be stripped. |
||
997 | * If empty, all empty tags will be stripped |
||
998 | * @param bool $treatNonBreakingSpaceAsEmpty If TRUE tags containing only entities will be treated as empty. |
||
999 | * @param bool $keepTags If true, the provided tags will be kept instead of stripped. |
||
1000 | * @return string the stripped content |
||
1001 | */ |
||
1002 | public function stripEmptyTags($content, $tagList = '', $treatNonBreakingSpaceAsEmpty = false, $keepTags = false) |
||
1003 | { |
||
1004 | if (!empty($tagList)) { |
||
1005 | $tagRegEx = implode('|', GeneralUtility::trimExplode(',', $tagList, true)); |
||
1006 | if ($keepTags) { |
||
1007 | $tagRegEx = '(?!' . $tagRegEx . ')[^ >]+'; |
||
1008 | } |
||
1009 | } else { |
||
1010 | $tagRegEx = '[^ >]+'; // all characters until you reach a > or space; |
||
1011 | } |
||
1012 | $count = 1; |
||
1013 | $nbspRegex = $treatNonBreakingSpaceAsEmpty ? '|( )' : ''; |
||
1014 | $finalRegex = sprintf('/<(%s)[^>]*>( %s)*<\/\\1[^>]*>/i', $tagRegEx, $nbspRegex); |
||
1015 | while ($count !== 0) { |
||
1016 | $content = preg_replace($finalRegex, '', $content, -1, $count) ?? $content; |
||
1017 | } |
||
1018 | return $content; |
||
1019 | } |
||
1020 | |||
1021 | /** |
||
1022 | * Strips the configured empty tags from the HMTL code. |
||
1023 | * |
||
1024 | * @param string $value |
||
1025 | * @param array $configuration |
||
1026 | * @return string |
||
1027 | */ |
||
1028 | protected function stripEmptyTagsIfConfigured($value, $configuration) |
||
1046 | } |
||
1047 | } |
||
1048 |