| Total Complexity | 224 | 
| Total Lines | 1011 | 
| Duplicated Lines | 0 % | 
| Changes | 0 | ||
Complex classes like HtmlParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HtmlParser, and based on these observations, apply Extract Interface, too.
| 1 | <?php | ||
| 25 | class HtmlParser | ||
| 26 | { | ||
| 27 | /** | ||
| 28 | * @var array | ||
| 29 | */ | ||
| 30 | protected $caseShift_cache = []; | ||
| 31 | |||
| 32 | // Void elements that do not have closing tags, as defined by HTML5, except link element | ||
| 33 | const VOID_ELEMENTS = 'area|base|br|col|command|embed|hr|img|input|keygen|meta|param|source|track|wbr'; | ||
| 34 | |||
| 35 | /************************************ | ||
| 36 | * | ||
| 37 | * Parsing HTML code | ||
| 38 | * | ||
| 39 | ************************************/ | ||
| 40 | /** | ||
| 41 | * Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag | ||
| 42 | * Even numbers in the array are outside the blocks, Odd numbers are block-content. | ||
| 43 | * Use ->removeFirstAndLastTag() to process the content if needed. | ||
| 44 | * | ||
| 45 | * @param string $tag List of tags, comma separated. | ||
| 46 | * @param string $content HTML-content | ||
| 47 | * @param bool $eliminateExtraEndTags If set, excessive end tags are ignored - you should probably set this in most cases. | ||
| 48 | * @return array Even numbers in the array are outside the blocks, Odd numbers are block-content. | ||
| 49 | * @see splitTags(), removeFirstAndLastTag() | ||
| 50 | */ | ||
| 51 | public function splitIntoBlock($tag, $content, $eliminateExtraEndTags = false) | ||
| 52 |     { | ||
| 53 |         $tags = array_unique(GeneralUtility::trimExplode(',', $tag, true)); | ||
| 54 |         array_walk($tags, function (&$tag) { | ||
| 55 | $tag = preg_quote($tag, '/'); | ||
| 56 | }); | ||
| 57 |         $regexStr = '/\\<\\/?(' . implode('|', $tags) . ')(\\s*\\>|\\s[^\\>]*\\>)/si'; | ||
| 58 | $parts = preg_split($regexStr, $content); | ||
| 59 | $newParts = []; | ||
| 60 | $pointer = strlen($parts[0]); | ||
| 61 | $buffer = $parts[0]; | ||
| 62 | $nested = 0; | ||
| 63 | reset($parts); | ||
|  | |||
| 64 | // We skip the first element in foreach loop | ||
| 65 | $partsSliced = array_slice($parts, 1, null, true); | ||
| 66 |         foreach ($partsSliced as $v) { | ||
| 67 | $isEndTag = substr($content, $pointer, 2) === '</'; | ||
| 68 | $tagLen = strcspn(substr($content, $pointer), '>') + 1; | ||
| 69 | // We meet a start-tag: | ||
| 70 |             if (!$isEndTag) { | ||
| 71 | // Ground level: | ||
| 72 |                 if (!$nested) { | ||
| 73 | // Previous buffer stored | ||
| 74 | $newParts[] = $buffer; | ||
| 75 | $buffer = ''; | ||
| 76 | } | ||
| 77 | // We are inside now! | ||
| 78 | $nested++; | ||
| 79 | // New buffer set and pointer increased | ||
| 80 | $mbuffer = substr($content, $pointer, strlen($v) + $tagLen); | ||
| 81 | $pointer += strlen($mbuffer); | ||
| 82 | $buffer .= $mbuffer; | ||
| 83 |             } else { | ||
| 84 | // If we meet an endtag: | ||
| 85 | // Decrease nested-level | ||
| 86 | $nested--; | ||
| 87 | $eliminated = 0; | ||
| 88 |                 if ($eliminateExtraEndTags && $nested < 0) { | ||
| 89 | $nested = 0; | ||
| 90 | $eliminated = 1; | ||
| 91 |                 } else { | ||
| 92 | // In any case, add the endtag to current buffer and increase pointer | ||
| 93 | $buffer .= substr($content, $pointer, $tagLen); | ||
| 94 | } | ||
| 95 | $pointer += $tagLen; | ||
| 96 | // if we're back on ground level, (and not by eliminating tags... | ||
| 97 |                 if (!$nested && !$eliminated) { | ||
| 98 | $newParts[] = $buffer; | ||
| 99 | $buffer = ''; | ||
| 100 | } | ||
| 101 | // New buffer set and pointer increased | ||
| 102 | $mbuffer = substr($content, $pointer, strlen($v)); | ||
| 103 | $pointer += strlen($mbuffer); | ||
| 104 | $buffer .= $mbuffer; | ||
| 105 | } | ||
| 106 | } | ||
| 107 | $newParts[] = $buffer; | ||
| 108 | return $newParts; | ||
| 109 | } | ||
| 110 | |||
| 111 | /** | ||
| 112 | * Splitting content into blocks *recursively* and processing tags/content with call back functions. | ||
| 113 | * | ||
| 114 | * @param string $tag Tag list, see splitIntoBlock() | ||
| 115 | * @param string $content Content, see splitIntoBlock() | ||
| 116 | * @param object $procObj Object where call back methods are. | ||
| 117 | * @param string $callBackContent Name of call back method for content; "function callBackContent($str,$level) | ||
| 118 | * @param string $callBackTags Name of call back method for tags; "function callBackTags($tags,$level) | ||
| 119 | * @param int $level Indent level | ||
| 120 | * @return string Processed content | ||
| 121 | * @see splitIntoBlock() | ||
| 122 | */ | ||
| 123 | public function splitIntoBlockRecursiveProc($tag, $content, &$procObj, $callBackContent, $callBackTags, $level = 0) | ||
| 145 | } | ||
| 146 | |||
| 147 | /** | ||
| 148 | * Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag | ||
| 149 | * Even numbers in the array are outside the blocks, Odd numbers are block-content. | ||
| 150 | * Use ->removeFirstAndLastTag() to process the content if needed. | ||
| 151 | * | ||
| 152 | * @param string $tag List of tags | ||
| 153 | * @param string $content HTML-content | ||
| 154 | * @return array Even numbers in the array are outside the blocks, Odd numbers are block-content. | ||
| 155 | * @see splitIntoBlock(), removeFirstAndLastTag() | ||
| 156 | */ | ||
| 157 | public function splitTags($tag, $content) | ||
| 158 |     { | ||
| 159 |         $tags = GeneralUtility::trimExplode(',', $tag, true); | ||
| 160 |         array_walk($tags, function (&$tag) { | ||
| 161 | $tag = preg_quote($tag, '/'); | ||
| 162 | }); | ||
| 163 |         $regexStr = '/\\<(' . implode('|', $tags) . ')(\\s[^>]*)?\\/?>/si'; | ||
| 164 | $parts = preg_split($regexStr, $content); | ||
| 165 | $pointer = strlen($parts[0]); | ||
| 166 | $newParts = []; | ||
| 167 | $newParts[] = $parts[0]; | ||
| 168 | reset($parts); | ||
| 169 | // We skip the first element in foreach loop | ||
| 170 | $partsSliced = array_slice($parts, 1, null, true); | ||
| 171 |         foreach ($partsSliced as $v) { | ||
| 172 | $tagLen = strcspn(substr($content, $pointer), '>') + 1; | ||
| 173 | // Set tag: | ||
| 174 | // New buffer set and pointer increased | ||
| 175 | $tag = substr($content, $pointer, $tagLen); | ||
| 176 | $newParts[] = $tag; | ||
| 177 | $pointer += strlen($tag); | ||
| 178 | // Set content: | ||
| 179 | $newParts[] = $v; | ||
| 180 | $pointer += strlen($v); | ||
| 181 | } | ||
| 182 | return $newParts; | ||
| 183 | } | ||
| 184 | |||
| 185 | /** | ||
| 186 | * Removes the first and last tag in the string | ||
| 187 | * Anything before the first and after the last tags respectively is also removed | ||
| 188 | * | ||
| 189 | * @param string $str String to process | ||
| 190 | * @return string | ||
| 191 | */ | ||
| 192 | public function removeFirstAndLastTag($str) | ||
| 193 |     { | ||
| 194 | // End of first tag: | ||
| 195 | $start = strpos($str, '>'); | ||
| 196 | // Begin of last tag: | ||
| 197 | $end = strrpos($str, '<'); | ||
| 198 | // Return | ||
| 199 | return substr($str, $start + 1, $end - $start - 1); | ||
| 200 | } | ||
| 201 | |||
| 202 | /** | ||
| 203 | * Returns the first tag in $str | ||
| 204 | * Actually everything from the beginning of the $str is returned, so you better make sure the tag is the first thing... | ||
| 205 | * | ||
| 206 | * @param string $str HTML string with tags | ||
| 207 | * @return string | ||
| 208 | */ | ||
| 209 | public function getFirstTag($str) | ||
| 210 |     { | ||
| 211 | // First: | ||
| 212 | $endLen = strpos($str, '>'); | ||
| 213 | return $endLen !== false ? substr($str, 0, $endLen + 1) : ''; | ||
| 214 | } | ||
| 215 | |||
| 216 | /** | ||
| 217 | * Returns the NAME of the first tag in $str | ||
| 218 | * | ||
| 219 | * @param string $str HTML tag (The element name MUST be separated from the attributes by a space character! Just *whitespace* will not do) | ||
| 220 | * @param bool $preserveCase If set, then the tag is NOT converted to uppercase by case is preserved. | ||
| 221 | * @return string Tag name in upper case | ||
| 222 | * @see getFirstTag() | ||
| 223 | */ | ||
| 224 | public function getFirstTagName($str, $preserveCase = false) | ||
| 225 |     { | ||
| 226 | $matches = []; | ||
| 227 |         if (preg_match('/^\\s*\\<([^\\s\\>]+)(\\s|\\>)/', $str, $matches) === 1) { | ||
| 228 |             if (!$preserveCase) { | ||
| 229 | return strtoupper($matches[1]); | ||
| 230 | } | ||
| 231 | return $matches[1]; | ||
| 232 | } | ||
| 233 | return ''; | ||
| 234 | } | ||
| 235 | |||
| 236 | /** | ||
| 237 | * Returns an array with all attributes as keys. Attributes are only lowercase a-z | ||
| 238 | * If an attribute is empty (shorthand), then the value for the key is empty. You can check if it existed with isset() | ||
| 239 | * | ||
| 240 | * Compared to the method in GeneralUtility::get_tag_attributes this method also returns meta data about each | ||
| 241 | * attribute, e.g. if it is a shorthand attribute, and what the quotation is. Also, since all attribute keys | ||
| 242 | * are lower-cased, the meta information contains the original attribute name. | ||
| 243 | * | ||
| 244 | * @param string $tag Tag: $tag is either a whole tag (eg '<TAG OPTION ATTRIB=VALUE>') or the parameterlist (ex ' OPTION ATTRIB=VALUE>') | ||
| 245 | * @param bool $deHSC If set, the attribute values are de-htmlspecialchar'ed. Should actually always be set! | ||
| 246 | * @return array array(Tag attributes,Attribute meta-data) | ||
| 247 | */ | ||
| 248 | public function get_tag_attributes($tag, $deHSC = false) | ||
| 249 |     { | ||
| 250 | list($components, $metaC) = $this->split_tag_attributes($tag); | ||
| 251 | // Attribute name is stored here | ||
| 252 | $name = ''; | ||
| 253 | $valuemode = false; | ||
| 254 | $attributes = []; | ||
| 255 | $attributesMeta = []; | ||
| 256 |         if (is_array($components)) { | ||
| 257 |             foreach ($components as $key => $val) { | ||
| 258 | // Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value | ||
| 259 |                 if ($val !== '=') { | ||
| 260 |                     if ($valuemode) { | ||
| 261 |                         if ($name) { | ||
| 262 | $attributes[$name] = $deHSC ? htmlspecialchars_decode($val) : $val; | ||
| 263 | $attributesMeta[$name]['dashType'] = $metaC[$key]; | ||
| 264 | $name = ''; | ||
| 265 | } | ||
| 266 |                     } else { | ||
| 267 |                         if ($namekey = preg_replace('/[^[:alnum:]_\\:\\-]/', '', $val)) { | ||
| 268 | $name = strtolower($namekey); | ||
| 269 | $attributesMeta[$name] = []; | ||
| 270 | $attributesMeta[$name]['origTag'] = $namekey; | ||
| 271 | $attributes[$name] = ''; | ||
| 272 | } | ||
| 273 | } | ||
| 274 | $valuemode = false; | ||
| 275 |                 } else { | ||
| 276 | $valuemode = true; | ||
| 277 | } | ||
| 278 | } | ||
| 279 | return [$attributes, $attributesMeta]; | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 283 | /** | ||
| 284 | * Returns an array with the 'components' from an attribute list. | ||
| 285 | * The result is normally analyzed by get_tag_attributes | ||
| 286 | * Removes tag-name if found. | ||
| 287 | * | ||
| 288 | * The difference between this method and the one in GeneralUtility is that this method actually determines | ||
| 289 | * more information on the attribute, e.g. if the value is enclosed by a " or ' character. | ||
| 290 | * That's why this method returns two arrays, the "components" and the "meta-information" of the "components". | ||
| 291 | * | ||
| 292 | * @param string $tag The tag or attributes | ||
| 293 | * @return array | ||
| 294 | * @access private | ||
| 295 | * @see \TYPO3\CMS\Core\Utility\GeneralUtility::split_tag_attributes() | ||
| 296 | */ | ||
| 297 | public function split_tag_attributes($tag) | ||
| 298 |     { | ||
| 299 | $matches = []; | ||
| 300 |         if (preg_match('/(\\<[^\\s]+\\s+)?(.*?)\\s*(\\>)?$/s', $tag, $matches) !== 1) { | ||
| 301 | return [[], []]; | ||
| 302 | } | ||
| 303 | $tag_tmp = $matches[2]; | ||
| 304 | $metaValue = []; | ||
| 305 | $value = []; | ||
| 306 | $matches = []; | ||
| 307 |         if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\\s"\'\\=]+|\\=)/s', $tag_tmp, $matches) > 0) { | ||
| 308 |             foreach ($matches[1] as $part) { | ||
| 309 | $firstChar = $part[0]; | ||
| 310 |                 if ($firstChar === '"' || $firstChar === '\'') { | ||
| 311 | $metaValue[] = $firstChar; | ||
| 312 | $value[] = substr($part, 1, -1); | ||
| 313 |                 } else { | ||
| 314 | $metaValue[] = ''; | ||
| 315 | $value[] = $part; | ||
| 316 | } | ||
| 317 | } | ||
| 318 | } | ||
| 319 | return [$value, $metaValue]; | ||
| 320 | } | ||
| 321 | |||
| 322 | /********************************* | ||
| 323 | * | ||
| 324 | * Clean HTML code | ||
| 325 | * | ||
| 326 | *********************************/ | ||
| 327 | /** | ||
| 328 | * Function that can clean up HTML content according to configuration given in the $tags array. | ||
| 329 | * | ||
| 330 |      * Initializing the $tags array to allow a list of tags (in this case <B>,<I>,<U> and <A>), set it like this:		 $tags = array_flip(explode(',','b,a,i,u')) | ||
| 331 | * If the value of the $tags[$tagname] entry is an array, advanced processing of the tags is initialized. These are the options: | ||
| 332 | * | ||
| 333 | * $tags[$tagname] = Array( | ||
| 334 | * 'overrideAttribs' => '' If set, this string is preset as the attributes of the tag | ||
| 335 | * 'allowedAttribs' => '0' (zero) = no attributes allowed, '[commalist of attributes]' = only allowed attributes. If blank, all attributes are allowed. | ||
| 336 | * 'fixAttrib' => Array( | ||
| 337 | * '[attribute name]' => Array ( | ||
| 338 | * 'set' => Force the attribute value to this value. | ||
| 339 | * 'unset' => Boolean: If set, the attribute is unset. | ||
| 340 | * 'default' => If no attribute exists by this name, this value is set as default value (if this value is not blank) | ||
| 341 | * 'always' => Boolean. If set, the attribute is always processed. Normally an attribute is processed only if it exists | ||
| 342 | * 'trim,intval,lower,upper' => All booleans. If any of these keys are set, the value is passed through the respective PHP-functions. | ||
| 343 |      * 'range' => Array ('[low limit]','[high limit, optional]')		Setting integer range. | ||
| 344 |      * 'list' => Array ('[value1/default]','[value2]','[value3]')		Attribute must be in this list. If not, the value is set to the first element. | ||
| 345 | * 'removeIfFalse' => Boolean/'blank'. If set, then the attribute is removed if it is 'FALSE'. If this value is set to 'blank' then the value must be a blank string (that means a 'zero' value will not be removed) | ||
| 346 | * 'removeIfEquals' => [value] If the attribute value matches the value set here, then it is removed. | ||
| 347 | * 'casesensitiveComp' => 1 If set, then the removeIfEquals and list comparisons will be case sensitive. Otherwise not. | ||
| 348 | * ) | ||
| 349 | * ), | ||
| 350 | * 'protect' => '', Boolean. If set, the tag <> is converted to < and > | ||
| 351 | * 'remap' => '', String. If set, the tagname is remapped to this tagname | ||
| 352 | * 'rmTagIfNoAttrib' => '', Boolean. If set, then the tag is removed if no attributes happened to be there. | ||
| 353 | * 'nesting' => '', Boolean/'global'. If set TRUE, then this tag must have starting and ending tags in the correct order. Any tags not in this order will be discarded. Thus '</B><B><I></B></I></B>' will be converted to '<B><I></B></I>'. Is the value 'global' then true nesting in relation to other tags marked for 'global' nesting control is preserved. This means that if <B> and <I> are set for global nesting then this string '</B><B><I></B></I></B>' is converted to '<B></B>' | ||
| 354 | * ) | ||
| 355 | * | ||
| 356 | * @param string $content Is the HTML-content being processed. This is also the result being returned. | ||
| 357 | * @param array $tags Is an array where each key is a tagname in lowercase. Only tags present as keys in this array are preserved. The value of the key can be an array with a vast number of options to configure. | ||
| 358 | * @param mixed $keepAll Boolean/'protect', if set, then all tags are kept regardless of tags present as keys in $tags-array. If 'protect' then the preserved tags have their <> converted to < and > | ||
| 359 | * @param int $hSC Values -1,0,1,2: Set to zero= disabled, set to 1 then the content BETWEEN tags is htmlspecialchar()'ed, set to -1 its the opposite and set to 2 the content will be HSC'ed BUT with preservation for real entities (eg. "&" or "ê") | ||
| 360 | * @param array $addConfig Configuration array send along as $conf to the internal functions | ||
| 361 | * @return string Processed HTML content | ||
| 362 | */ | ||
| 363 | public function HTMLcleaner($content, $tags = [], $keepAll = 0, $hSC = 0, $addConfig = []) | ||
| 364 |     { | ||
| 365 | $newContent = []; | ||
| 366 |         $tokArr = explode('<', $content); | ||
| 367 | $newContent[] = $this->bidir_htmlspecialchars(current($tokArr), $hSC); | ||
| 368 | // We skip the first element in foreach loop | ||
| 369 | $tokArrSliced = array_slice($tokArr, 1, null, true); | ||
| 370 | $c = 1; | ||
| 371 | $tagRegister = []; | ||
| 372 | $tagStack = []; | ||
| 373 | $inComment = false; | ||
| 374 | $inCdata = false; | ||
| 375 | $skipTag = false; | ||
| 376 |         foreach ($tokArrSliced as $tok) { | ||
| 377 |             if ($inComment) { | ||
| 378 |                 if (($eocPos = strpos($tok, '-->')) === false) { | ||
| 379 | // End of comment is not found in the token. Go further until end of comment is found in other tokens. | ||
| 380 | $newContent[$c++] = '<' . $tok; | ||
| 381 | continue; | ||
| 382 | } | ||
| 383 | // Comment ends in the middle of the token: add comment and proceed with rest of the token | ||
| 384 | $newContent[$c++] = '<' . substr($tok, 0, ($eocPos + 3)); | ||
| 385 | $tok = substr($tok, $eocPos + 3); | ||
| 386 | $inComment = false; | ||
| 387 | $skipTag = true; | ||
| 388 |             } elseif ($inCdata) { | ||
| 389 |                 if (($eocPos = strpos($tok, '/*]]>*/')) === false) { | ||
| 390 | // End of comment is not found in the token. Go further until end of comment is found in other tokens. | ||
| 391 | $newContent[$c++] = '<' . $tok; | ||
| 392 | continue; | ||
| 393 | } | ||
| 394 | // Comment ends in the middle of the token: add comment and proceed with rest of the token | ||
| 395 | $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10); | ||
| 396 | $tok = substr($tok, $eocPos + 10); | ||
| 397 | $inCdata = false; | ||
| 398 | $skipTag = true; | ||
| 399 |             } elseif (substr($tok, 0, 3) === '!--') { | ||
| 400 |                 if (($eocPos = strpos($tok, '-->')) === false) { | ||
| 401 | // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment | ||
| 402 | $newContent[$c++] = '<' . $tok; | ||
| 403 | $inComment = true; | ||
| 404 | continue; | ||
| 405 | } | ||
| 406 | // Start and end of comment are both in the current token. Add comment and proceed with rest of the token | ||
| 407 | $newContent[$c++] = '<' . substr($tok, 0, ($eocPos + 3)); | ||
| 408 | $tok = substr($tok, $eocPos + 3); | ||
| 409 | $skipTag = true; | ||
| 410 |             } elseif (substr($tok, 0, 10) === '![CDATA[*/') { | ||
| 411 |                 if (($eocPos = strpos($tok, '/*]]>*/')) === false) { | ||
| 412 | // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment | ||
| 413 | $newContent[$c++] = '<' . $tok; | ||
| 414 | $inCdata = true; | ||
| 415 | continue; | ||
| 416 | } | ||
| 417 | // Start and end of comment are both in the current token. Add comment and proceed with rest of the token | ||
| 418 | $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10); | ||
| 419 | $tok = substr($tok, $eocPos + 10); | ||
| 420 | $skipTag = true; | ||
| 421 | } | ||
| 422 | $firstChar = $tok[0]; | ||
| 423 | // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..> | ||
| 424 |             if (!$skipTag && preg_match('/[[:alnum:]\\/]/', $firstChar) == 1) { | ||
| 425 | $tagEnd = strpos($tok, '>'); | ||
| 426 | // If there is and end-bracket... tagEnd can't be 0 as the first character can't be a > | ||
| 427 |                 if ($tagEnd) { | ||
| 428 | $endTag = $firstChar === '/' ? 1 : 0; | ||
| 429 | $tagContent = substr($tok, $endTag, $tagEnd - $endTag); | ||
| 430 |                     $tagParts = preg_split('/\\s+/s', $tagContent, 2); | ||
| 431 | $tagName = strtolower($tagParts[0]); | ||
| 432 | $emptyTag = 0; | ||
| 433 |                     if (isset($tags[$tagName])) { | ||
| 434 | // If there is processing to do for the tag: | ||
| 435 |                         if (is_array($tags[$tagName])) { | ||
| 436 |                             if (preg_match('/^(' . self::VOID_ELEMENTS . ' )$/i', $tagName)) { | ||
| 437 | $emptyTag = 1; | ||
| 438 | } | ||
| 439 | // If NOT an endtag, do attribute processing (added dec. 2003) | ||
| 440 |                             if (!$endTag) { | ||
| 441 | // Override attributes | ||
| 442 |                                 if ((string)$tags[$tagName]['overrideAttribs'] !== '') { | ||
| 443 | $tagParts[1] = $tags[$tagName]['overrideAttribs']; | ||
| 444 | } | ||
| 445 | // Allowed tags | ||
| 446 |                                 if ((string)$tags[$tagName]['allowedAttribs'] !== '') { | ||
| 447 | // No attribs allowed | ||
| 448 |                                     if ((string)$tags[$tagName]['allowedAttribs'] === '0') { | ||
| 449 | $tagParts[1] = ''; | ||
| 450 |                                     } elseif (trim($tagParts[1])) { | ||
| 451 | $tagAttrib = $this->get_tag_attributes($tagParts[1]); | ||
| 452 | $tagParts[1] = ''; | ||
| 453 | $newTagAttrib = []; | ||
| 454 |                                         if (!($tList = $tags[$tagName]['_allowedAttribs'])) { | ||
| 455 | // Just explode attribts for tag once | ||
| 456 |                                             $tList = ($tags[$tagName]['_allowedAttribs'] = GeneralUtility::trimExplode(',', strtolower($tags[$tagName]['allowedAttribs']), true)); | ||
| 457 | } | ||
| 458 |                                         foreach ($tList as $allowTag) { | ||
| 459 |                                             if (isset($tagAttrib[0][$allowTag])) { | ||
| 460 | $newTagAttrib[$allowTag] = $tagAttrib[0][$allowTag]; | ||
| 461 | } | ||
| 462 | } | ||
| 463 | $tagParts[1] = $this->compileTagAttribs($newTagAttrib, $tagAttrib[1]); | ||
| 464 | } | ||
| 465 | } | ||
| 466 | // Fixed attrib values | ||
| 467 |                                 if (is_array($tags[$tagName]['fixAttrib'])) { | ||
| 468 | $tagAttrib = $this->get_tag_attributes($tagParts[1]); | ||
| 469 | $tagParts[1] = ''; | ||
| 470 |                                     foreach ($tags[$tagName]['fixAttrib'] as $attr => $params) { | ||
| 471 |                                         if (isset($params['set']) && $params['set'] !== '') { | ||
| 472 | $tagAttrib[0][$attr] = $params['set']; | ||
| 473 | } | ||
| 474 |                                         if (!empty($params['unset'])) { | ||
| 475 | unset($tagAttrib[0][$attr]); | ||
| 476 | } | ||
| 477 |                                         if (!isset($tagAttrib[0][$attr]) && (string)$params['default'] !== '') { | ||
| 478 | $tagAttrib[0][$attr] = $params['default']; | ||
| 479 | } | ||
| 480 |                                         if ($params['always'] || isset($tagAttrib[0][$attr])) { | ||
| 481 |                                             if ($params['trim']) { | ||
| 482 | $tagAttrib[0][$attr] = trim($tagAttrib[0][$attr]); | ||
| 483 | } | ||
| 484 |                                             if ($params['intval']) { | ||
| 485 | $tagAttrib[0][$attr] = (int)$tagAttrib[0][$attr]; | ||
| 486 | } | ||
| 487 |                                             if ($params['lower']) { | ||
| 488 | $tagAttrib[0][$attr] = strtolower($tagAttrib[0][$attr]); | ||
| 489 | } | ||
| 490 |                                             if ($params['upper']) { | ||
| 491 | $tagAttrib[0][$attr] = strtoupper($tagAttrib[0][$attr]); | ||
| 492 | } | ||
| 493 |                                             if ($params['range']) { | ||
| 494 |                                                 if (isset($params['range'][1])) { | ||
| 495 | $tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0], (int)$params['range'][1]); | ||
| 496 |                                                 } else { | ||
| 497 | $tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0]); | ||
| 498 | } | ||
| 499 | } | ||
| 500 |                                             if (is_array($params['list'])) { | ||
| 501 | // For the class attribute, remove from the attribute value any class not in the list | ||
| 502 | // Classes are case sensitive | ||
| 503 |                                                 if ($attr === 'class') { | ||
| 504 | $newClasses = []; | ||
| 505 |                                                     $classes = GeneralUtility::trimExplode(' ', $tagAttrib[0][$attr], true); | ||
| 506 |                                                     foreach ($classes as $class) { | ||
| 507 |                                                         if (in_array($class, $params['list'])) { | ||
| 508 | $newClasses[] = $class; | ||
| 509 | } | ||
| 510 | } | ||
| 511 |                                                     if (!empty($newClasses)) { | ||
| 512 |                                                         $tagAttrib[0][$attr] = implode(' ', $newClasses); | ||
| 513 |                                                     } else { | ||
| 514 | $tagAttrib[0][$attr] = $params['list'][0]; | ||
| 515 | } | ||
| 516 |                                                 } else { | ||
| 517 |                                                     if (!in_array($this->caseShift($tagAttrib[0][$attr], $params['casesensitiveComp']), $this->caseShift($params['list'], $params['casesensitiveComp'], $tagName))) { | ||
| 518 | $tagAttrib[0][$attr] = $params['list'][0]; | ||
| 519 | } | ||
| 520 | } | ||
| 521 | } | ||
| 522 |                                             if ($params['removeIfFalse'] && $params['removeIfFalse'] !== 'blank' && !$tagAttrib[0][$attr] || $params['removeIfFalse'] === 'blank' && (string)$tagAttrib[0][$attr] === '') { | ||
| 523 | unset($tagAttrib[0][$attr]); | ||
| 524 | } | ||
| 525 |                                             if ((string)$params['removeIfEquals'] !== '' && $this->caseShift($tagAttrib[0][$attr], $params['casesensitiveComp']) === $this->caseShift($params['removeIfEquals'], $params['casesensitiveComp'])) { | ||
| 526 | unset($tagAttrib[0][$attr]); | ||
| 527 | } | ||
| 528 |                                             if ($params['prefixLocalAnchors']) { | ||
| 529 |                                                 if ($tagAttrib[0][$attr][0] === '#') { | ||
| 530 |                                                     if ($params['prefixLocalAnchors'] == 2) { | ||
| 531 | /** @var ContentObjectRenderer $contentObjectRenderer */ | ||
| 532 | $contentObjectRenderer = GeneralUtility::makeInstance(ContentObjectRenderer::class); | ||
| 533 | $prefix = $contentObjectRenderer->getUrlToCurrentLocation(); | ||
| 534 |                                                     } else { | ||
| 535 |                                                         $prefix = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL'); | ||
| 536 | } | ||
| 537 | $tagAttrib[0][$attr] = $prefix . $tagAttrib[0][$attr]; | ||
| 538 | } | ||
| 539 | } | ||
| 540 |                                             if ($params['prefixRelPathWith']) { | ||
| 541 | $urlParts = parse_url($tagAttrib[0][$attr]); | ||
| 542 |                                                 if (!$urlParts['scheme'] && $urlParts['path'][0] !== '/') { | ||
| 543 | // If it is NOT an absolute URL (by http: or starting "/") | ||
| 544 | $tagAttrib[0][$attr] = $params['prefixRelPathWith'] . $tagAttrib[0][$attr]; | ||
| 545 | } | ||
| 546 | } | ||
| 547 |                                             if ($params['userFunc']) { | ||
| 548 |                                                 if (is_array($params['userFunc.'])) { | ||
| 549 | $params['userFunc.']['attributeValue'] = $tagAttrib[0][$attr]; | ||
| 550 |                                                 } else { | ||
| 551 | $params['userFunc.'] = $tagAttrib[0][$attr]; | ||
| 552 | } | ||
| 553 | $tagAttrib[0][$attr] = GeneralUtility::callUserFunction($params['userFunc'], $params['userFunc.'], $this); | ||
| 554 | } | ||
| 555 | } | ||
| 556 | } | ||
| 557 | $tagParts[1] = $this->compileTagAttribs($tagAttrib[0], $tagAttrib[1]); | ||
| 558 | } | ||
| 559 |                             } else { | ||
| 560 | // If endTag, remove any possible attributes: | ||
| 561 | $tagParts[1] = ''; | ||
| 562 | } | ||
| 563 | // Protecting the tag by converting < and > to < and > ?? | ||
| 564 |                             if ($tags[$tagName]['protect']) { | ||
| 565 | $lt = '<'; | ||
| 566 | $gt = '>'; | ||
| 567 |                             } else { | ||
| 568 | $lt = '<'; | ||
| 569 | $gt = '>'; | ||
| 570 | } | ||
| 571 | // Remapping tag name? | ||
| 572 |                             if ($tags[$tagName]['remap']) { | ||
| 573 | $tagParts[0] = $tags[$tagName]['remap']; | ||
| 574 | } | ||
| 575 | // rmTagIfNoAttrib | ||
| 576 |                             if ($endTag || trim($tagParts[1]) || !$tags[$tagName]['rmTagIfNoAttrib']) { | ||
| 577 | $setTag = 1; | ||
| 578 | // Remove this closing tag if $tagName was among $TSconfig['removeTags'] | ||
| 579 |                                 if ($endTag && $tags[$tagName]['allowedAttribs'] === 0 && $tags[$tagName]['rmTagIfNoAttrib'] === 1) { | ||
| 580 | $setTag = 0; | ||
| 581 | } | ||
| 582 |                                 if ($tags[$tagName]['nesting']) { | ||
| 583 |                                     if (!is_array($tagRegister[$tagName])) { | ||
| 584 | $tagRegister[$tagName] = []; | ||
| 585 | } | ||
| 586 |                                     if ($endTag) { | ||
| 587 | $correctTag = 1; | ||
| 588 |                                         if ($tags[$tagName]['nesting'] === 'global') { | ||
| 589 | $lastEl = end($tagStack); | ||
| 590 |                                             if ($tagName !== $lastEl) { | ||
| 591 |                                                 if (in_array($tagName, $tagStack)) { | ||
| 592 |                                                     while (!empty($tagStack) && $tagName !== $lastEl) { | ||
| 593 | $elPos = end($tagRegister[$lastEl]); | ||
| 594 | unset($newContent[$elPos]); | ||
| 595 | array_pop($tagRegister[$lastEl]); | ||
| 596 | array_pop($tagStack); | ||
| 597 | $lastEl = end($tagStack); | ||
| 598 | } | ||
| 599 |                                                 } else { | ||
| 600 | // In this case the | ||
| 601 | $correctTag = 0; | ||
| 602 | } | ||
| 603 | } | ||
| 604 | } | ||
| 605 |                                         if (empty($tagRegister[$tagName]) || !$correctTag) { | ||
| 606 | $setTag = 0; | ||
| 607 |                                         } else { | ||
| 608 | array_pop($tagRegister[$tagName]); | ||
| 609 |                                             if ($tags[$tagName]['nesting'] === 'global') { | ||
| 610 | array_pop($tagStack); | ||
| 611 | } | ||
| 612 | } | ||
| 613 |                                     } else { | ||
| 614 | $tagRegister[$tagName][] = $c; | ||
| 615 |                                         if ($tags[$tagName]['nesting'] === 'global') { | ||
| 616 | $tagStack[] = $tagName; | ||
| 617 | } | ||
| 618 | } | ||
| 619 | } | ||
| 620 |                                 if ($setTag) { | ||
| 621 | // Setting the tag | ||
| 622 | $newContent[$c++] = $lt . ($endTag ? '/' : '') . trim($tagParts[0] . ' ' . $tagParts[1]) . ($emptyTag ? ' /' : '') . $gt; | ||
| 623 | } | ||
| 624 | } | ||
| 625 |                         } else { | ||
| 626 | $newContent[$c++] = '<' . ($endTag ? '/' : '') . $tagContent . '>'; | ||
| 627 | } | ||
| 628 |                     } elseif ($keepAll) { | ||
| 629 | // This is if the tag was not defined in the array for processing: | ||
| 630 |                         if ($keepAll === 'protect') { | ||
| 631 | $lt = '<'; | ||
| 632 | $gt = '>'; | ||
| 633 |                         } else { | ||
| 634 | $lt = '<'; | ||
| 635 | $gt = '>'; | ||
| 636 | } | ||
| 637 | $newContent[$c++] = $lt . ($endTag ? '/' : '') . $tagContent . $gt; | ||
| 638 | } | ||
| 639 | $newContent[$c++] = $this->bidir_htmlspecialchars(substr($tok, $tagEnd + 1), $hSC); | ||
| 640 |                 } else { | ||
| 641 |                     $newContent[$c++] = $this->bidir_htmlspecialchars('<' . $tok, $hSC); | ||
| 642 | } | ||
| 643 |             } else { | ||
| 644 | $newContent[$c++] = $this->bidir_htmlspecialchars(($skipTag ? '' : '<') . $tok, $hSC); | ||
| 645 | // It was not a tag anyways | ||
| 646 | $skipTag = false; | ||
| 647 | } | ||
| 648 | } | ||
| 649 | // Unsetting tags: | ||
| 650 |         foreach ($tagRegister as $tag => $positions) { | ||
| 651 |             foreach ($positions as $pKey) { | ||
| 652 | unset($newContent[$pKey]); | ||
| 653 | } | ||
| 654 | } | ||
| 655 |         $newContent = implode('', $newContent); | ||
| 656 | $newContent = $this->stripEmptyTagsIfConfigured($newContent, $addConfig); | ||
| 657 | return $newContent; | ||
| 658 | } | ||
| 659 | |||
| 660 | /** | ||
| 661 | * Converts htmlspecialchars forth ($dir=1) AND back ($dir=-1) | ||
| 662 | * | ||
| 663 | * @param string $value Input value | ||
| 664 | * @param int $dir Direction: forth ($dir=1, dir=2 for preserving entities) AND back ($dir=-1) | ||
| 665 | * @return string Output value | ||
| 666 | */ | ||
| 667 | public function bidir_htmlspecialchars($value, $dir) | ||
| 668 |     { | ||
| 669 |         switch ((int)$dir) { | ||
| 670 | case 1: | ||
| 671 | return htmlspecialchars($value); | ||
| 672 | case 2: | ||
| 673 | return htmlspecialchars($value, ENT_COMPAT, 'UTF-8', false); | ||
| 674 | case -1: | ||
| 675 | return htmlspecialchars_decode($value); | ||
| 676 | default: | ||
| 677 | return $value; | ||
| 678 | } | ||
| 679 | } | ||
| 680 | |||
| 681 | /** | ||
| 682 | * Prefixes the relative paths of hrefs/src/action in the tags [td,table,body,img,input,form,link,script,a] in the $content with the $main_prefix or and alternative given by $alternatives | ||
| 683 | * | ||
| 684 | * @param string $main_prefix Prefix string | ||
| 685 | * @param string $content HTML content | ||
| 686 | * @param array $alternatives Array with alternative prefixes for certain of the tags. key=>value pairs where the keys are the tag element names in uppercase | ||
| 687 | * @param string $suffix Suffix string (put after the resource). | ||
| 688 | * @return string Processed HTML content | ||
| 689 | */ | ||
| 690 | public function prefixResourcePath($main_prefix, $content, $alternatives = [], $suffix = '') | ||
| 691 |     { | ||
| 692 |         $parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a,param', $content); | ||
| 693 |         foreach ($parts as $k => $v) { | ||
| 694 |             if ($k % 2) { | ||
| 695 | $params = $this->get_tag_attributes($v); | ||
| 696 | // Detect tag-ending so that it is re-applied correctly. | ||
| 697 | $tagEnd = substr($v, -2) === '/>' ? ' />' : '>'; | ||
| 698 | // The 'name' of the first tag | ||
| 699 | $firstTagName = $this->getFirstTagName($v); | ||
| 700 | $somethingDone = 0; | ||
| 701 | $prefix = $alternatives[strtoupper($firstTagName)] ?? $main_prefix; | ||
| 702 |                 switch (strtolower($firstTagName)) { | ||
| 703 | case 'td': | ||
| 704 | |||
| 705 | case 'body': | ||
| 706 | |||
| 707 | case 'table': | ||
| 708 | $src = $params[0]['background']; | ||
| 709 |                         if ($src) { | ||
| 710 | $params[0]['background'] = $this->prefixRelPath($prefix, $params[0]['background'], $suffix); | ||
| 711 | $somethingDone = 1; | ||
| 712 | } | ||
| 713 | break; | ||
| 714 | case 'img': | ||
| 715 | |||
| 716 | case 'input': | ||
| 717 | |||
| 718 | case 'script': | ||
| 719 | |||
| 720 | case 'embed': | ||
| 721 | $src = $params[0]['src']; | ||
| 722 |                         if ($src) { | ||
| 723 | $params[0]['src'] = $this->prefixRelPath($prefix, $params[0]['src'], $suffix); | ||
| 724 | $somethingDone = 1; | ||
| 725 | } | ||
| 726 | break; | ||
| 727 | case 'link': | ||
| 728 | |||
| 729 | case 'a': | ||
| 730 | $src = $params[0]['href']; | ||
| 731 |                         if ($src) { | ||
| 732 | $params[0]['href'] = $this->prefixRelPath($prefix, $params[0]['href'], $suffix); | ||
| 733 | $somethingDone = 1; | ||
| 734 | } | ||
| 735 | break; | ||
| 736 | case 'form': | ||
| 737 | $src = $params[0]['action']; | ||
| 738 |                         if ($src) { | ||
| 739 | $params[0]['action'] = $this->prefixRelPath($prefix, $params[0]['action'], $suffix); | ||
| 740 | $somethingDone = 1; | ||
| 741 | } | ||
| 742 | break; | ||
| 743 | case 'param': | ||
| 744 | $test = $params[0]['name']; | ||
| 745 |                         if ($test && $test === 'movie') { | ||
| 746 |                             if ($params[0]['value']) { | ||
| 747 | $params[0]['value'] = $this->prefixRelPath($prefix, $params[0]['value'], $suffix); | ||
| 748 | $somethingDone = 1; | ||
| 749 | } | ||
| 750 | } | ||
| 751 | break; | ||
| 752 | } | ||
| 753 |                 if ($somethingDone) { | ||
| 754 |                     $tagParts = preg_split('/\\s+/s', $v, 2); | ||
| 755 | $tagParts[1] = $this->compileTagAttribs($params[0], $params[1]); | ||
| 756 | $parts[$k] = '<' . trim(strtolower($firstTagName) . ' ' . $tagParts[1]) . $tagEnd; | ||
| 757 | } | ||
| 758 | } | ||
| 759 | } | ||
| 760 |         $content = implode('', $parts); | ||
| 761 | // Fix <style> section: | ||
| 762 | $prefix = $alternatives['style'] ?? $main_prefix; | ||
| 763 |         if ((string)$prefix !== '') { | ||
| 764 |             $parts = $this->splitIntoBlock('style', $content); | ||
| 765 |             foreach ($parts as $k => &$part) { | ||
| 766 |                 if ($k % 2) { | ||
| 767 |                     $part = preg_replace('/(url[[:space:]]*\\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\\))/i', '\\1' . $prefix . '\\2' . $suffix . '\\3', $part); | ||
| 768 | } | ||
| 769 | } | ||
| 770 | unset($part); | ||
| 771 |             $content = implode('', $parts); | ||
| 772 | } | ||
| 773 | return $content; | ||
| 774 | } | ||
| 775 | |||
| 776 | /** | ||
| 777 | * Internal sub-function for ->prefixResourcePath() | ||
| 778 | * | ||
| 779 | * @param string $prefix Prefix string | ||
| 780 | * @param string $srcVal Relative path/URL | ||
| 781 | * @param string $suffix Suffix string | ||
| 782 | * @return string Output path, prefixed if no scheme in input string | ||
| 783 | * @access private | ||
| 784 | */ | ||
| 785 | public function prefixRelPath($prefix, $srcVal, $suffix = '') | ||
| 786 |     { | ||
| 787 | // Only prefix if it's not an absolute URL or | ||
| 788 | // only a link to a section within the page. | ||
| 789 |         if ($srcVal[0] !== '/' && $srcVal[0] !== '#') { | ||
| 790 | $urlParts = parse_url($srcVal); | ||
| 791 | // Only prefix URLs without a scheme | ||
| 792 |             if (!$urlParts['scheme']) { | ||
| 793 | $srcVal = $prefix . $srcVal . $suffix; | ||
| 794 | } | ||
| 795 | } | ||
| 796 | return $srcVal; | ||
| 797 | } | ||
| 798 | |||
| 799 | /** | ||
| 800 | * Internal function for case shifting of a string or whole array | ||
| 801 | * | ||
| 802 | * @param mixed $str Input string/array | ||
| 803 | * @param bool $caseSensitiveComparison If this value is FALSE, the string is returned in uppercase | ||
| 804 | * @param string $cacheKey Key string used for internal caching of the results. Could be an MD5 hash of the serialized version of the input $str if that is an array. | ||
| 805 | * @return string Output string, processed | ||
| 806 | * @access private | ||
| 807 | */ | ||
| 808 | public function caseShift($str, $caseSensitiveComparison, $cacheKey = '') | ||
| 809 |     { | ||
| 810 |         if ($caseSensitiveComparison) { | ||
| 811 | return $str; | ||
| 812 | } | ||
| 813 |         if (is_array($str)) { | ||
| 814 | // Fetch from runlevel cache | ||
| 815 |             if ($cacheKey && isset($this->caseShift_cache[$cacheKey])) { | ||
| 816 | $str = $this->caseShift_cache[$cacheKey]; | ||
| 817 |             } else { | ||
| 818 |                 array_walk($str, function (&$value) { | ||
| 819 | $value = strtoupper($value); | ||
| 820 | }); | ||
| 821 |                 if ($cacheKey) { | ||
| 822 | $this->caseShift_cache[$cacheKey] = $str; | ||
| 823 | } | ||
| 824 | } | ||
| 825 |         } else { | ||
| 826 | $str = strtoupper($str); | ||
| 827 | } | ||
| 828 | return $str; | ||
| 829 | } | ||
| 830 | |||
| 831 | /** | ||
| 832 | * Compiling an array with tag attributes into a string | ||
| 833 | * | ||
| 834 | * @param array $tagAttrib Tag attributes | ||
| 835 | * @param array $meta Meta information about these attributes (like if they were quoted) | ||
| 836 | * @return string Imploded attributes, eg: 'attribute="value" attrib2="value2"' | ||
| 837 | * @access private | ||
| 838 | */ | ||
| 839 | public function compileTagAttribs($tagAttrib, $meta = []) | ||
| 840 |     { | ||
| 841 | $accu = []; | ||
| 842 |         foreach ($tagAttrib as $k => $v) { | ||
| 843 | $attr = $meta[$k]['origTag'] ?: $k; | ||
| 844 |             if (strcmp($v, '') || isset($meta[$k]['dashType'])) { | ||
| 845 | $dash = $meta[$k]['dashType'] ?: (MathUtility::canBeInterpretedAsInteger($v) ? '' : '"'); | ||
| 846 | $attr .= '=' . $dash . $v . $dash; | ||
| 847 | } | ||
| 848 | $accu[] = $attr; | ||
| 849 | } | ||
| 850 |         return implode(' ', $accu); | ||
| 851 | } | ||
| 852 | |||
| 853 | /** | ||
| 854 | * Converts TSconfig into an array for the HTMLcleaner function. | ||
| 855 | * | ||
| 856 | * @param array $TSconfig TSconfig for HTMLcleaner | ||
| 857 | * @param array $keepTags Array of tags to keep (?) | ||
| 858 | * @return array | ||
| 859 | * @access private | ||
| 860 | */ | ||
| 861 | public function HTMLparserConfig($TSconfig, $keepTags = []) | ||
| 862 |     { | ||
| 863 | // Allow tags (base list, merged with incoming array) | ||
| 864 |         $alTags = array_flip(GeneralUtility::trimExplode(',', strtolower($TSconfig['allowTags']), true)); | ||
| 865 | $keepTags = array_merge($alTags, $keepTags); | ||
| 866 | // Set config properties. | ||
| 867 |         if (is_array($TSconfig['tags.'])) { | ||
| 868 |             foreach ($TSconfig['tags.'] as $key => $tagC) { | ||
| 869 |                 if (!is_array($tagC) && $key == strtolower($key)) { | ||
| 870 |                     if ((string)$tagC === '0') { | ||
| 871 | unset($keepTags[$key]); | ||
| 872 | } | ||
| 873 |                     if ((string)$tagC === '1' && !isset($keepTags[$key])) { | ||
| 874 | $keepTags[$key] = 1; | ||
| 875 | } | ||
| 876 | } | ||
| 877 | } | ||
| 878 |             foreach ($TSconfig['tags.'] as $key => $tagC) { | ||
| 879 |                 if (is_array($tagC) && $key == strtolower($key)) { | ||
| 880 | $key = substr($key, 0, -1); | ||
| 881 |                     if (!is_array($keepTags[$key])) { | ||
| 882 | $keepTags[$key] = []; | ||
| 883 | } | ||
| 884 |                     if (is_array($tagC['fixAttrib.'])) { | ||
| 885 |                         foreach ($tagC['fixAttrib.'] as $atName => $atConfig) { | ||
| 886 |                             if (is_array($atConfig)) { | ||
| 887 | $atName = substr($atName, 0, -1); | ||
| 888 |                                 if (!is_array($keepTags[$key]['fixAttrib'][$atName])) { | ||
| 889 | $keepTags[$key]['fixAttrib'][$atName] = []; | ||
| 890 | } | ||
| 891 | $keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName], $atConfig); | ||
| 892 |                                 if ((string)$keepTags[$key]['fixAttrib'][$atName]['range'] !== '') { | ||
| 893 |                                     $keepTags[$key]['fixAttrib'][$atName]['range'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['range']); | ||
| 894 | } | ||
| 895 |                                 if ((string)$keepTags[$key]['fixAttrib'][$atName]['list'] !== '') { | ||
| 896 |                                     $keepTags[$key]['fixAttrib'][$atName]['list'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['list']); | ||
| 897 | } | ||
| 898 | } | ||
| 899 | } | ||
| 900 | } | ||
| 901 | unset($tagC['fixAttrib.']); | ||
| 902 | unset($tagC['fixAttrib']); | ||
| 903 |                     if (isset($tagC['rmTagIfNoAttrib']) && $tagC['rmTagIfNoAttrib'] && empty($tagC['nesting'])) { | ||
| 904 | $tagC['nesting'] = 1; | ||
| 905 | } | ||
| 906 | $keepTags[$key] = array_merge($keepTags[$key], $tagC); | ||
| 907 | } | ||
| 908 | } | ||
| 909 | } | ||
| 910 | // LocalNesting | ||
| 911 |         if ($TSconfig['localNesting']) { | ||
| 912 |             $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['localNesting']), true); | ||
| 913 |             foreach ($lN as $tn) { | ||
| 914 |                 if (isset($keepTags[$tn])) { | ||
| 915 |                     if (!is_array($keepTags[$tn])) { | ||
| 916 | $keepTags[$tn] = []; | ||
| 917 | } | ||
| 918 | $keepTags[$tn]['nesting'] = 1; | ||
| 919 | } | ||
| 920 | } | ||
| 921 | } | ||
| 922 |         if ($TSconfig['globalNesting']) { | ||
| 923 |             $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['globalNesting']), true); | ||
| 924 |             foreach ($lN as $tn) { | ||
| 925 |                 if (isset($keepTags[$tn])) { | ||
| 926 |                     if (!is_array($keepTags[$tn])) { | ||
| 927 | $keepTags[$tn] = []; | ||
| 928 | } | ||
| 929 | $keepTags[$tn]['nesting'] = 'global'; | ||
| 930 | } | ||
| 931 | } | ||
| 932 | } | ||
| 933 |         if ($TSconfig['rmTagIfNoAttrib']) { | ||
| 934 |             $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['rmTagIfNoAttrib']), true); | ||
| 935 |             foreach ($lN as $tn) { | ||
| 936 |                 if (isset($keepTags[$tn])) { | ||
| 937 |                     if (!is_array($keepTags[$tn])) { | ||
| 938 | $keepTags[$tn] = []; | ||
| 939 | } | ||
| 940 | $keepTags[$tn]['rmTagIfNoAttrib'] = 1; | ||
| 941 |                     if (empty($keepTags[$tn]['nesting'])) { | ||
| 942 | $keepTags[$tn]['nesting'] = 1; | ||
| 943 | } | ||
| 944 | } | ||
| 945 | } | ||
| 946 | } | ||
| 947 |         if ($TSconfig['noAttrib']) { | ||
| 948 |             $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['noAttrib']), true); | ||
| 949 |             foreach ($lN as $tn) { | ||
| 950 |                 if (isset($keepTags[$tn])) { | ||
| 951 |                     if (!is_array($keepTags[$tn])) { | ||
| 952 | $keepTags[$tn] = []; | ||
| 953 | } | ||
| 954 | $keepTags[$tn]['allowedAttribs'] = 0; | ||
| 955 | } | ||
| 956 | } | ||
| 957 | } | ||
| 958 |         if ($TSconfig['removeTags']) { | ||
| 959 |             $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['removeTags']), true); | ||
| 960 |             foreach ($lN as $tn) { | ||
| 961 | $keepTags[$tn] = []; | ||
| 962 | $keepTags[$tn]['allowedAttribs'] = 0; | ||
| 963 | $keepTags[$tn]['rmTagIfNoAttrib'] = 1; | ||
| 964 | } | ||
| 965 | } | ||
| 966 | // Create additional configuration: | ||
| 967 | $addConfig = []; | ||
| 968 |         if (isset($TSconfig['stripEmptyTags'])) { | ||
| 969 | $addConfig['stripEmptyTags'] = $TSconfig['stripEmptyTags']; | ||
| 970 |             if (isset($TSconfig['stripEmptyTags.'])) { | ||
| 971 | $addConfig['stripEmptyTags.'] = $TSconfig['stripEmptyTags.']; | ||
| 972 | } | ||
| 973 | } | ||
| 974 | return [ | ||
| 975 | $keepTags, | ||
| 976 | '' . $TSconfig['keepNonMatchedTags'], | ||
| 977 | (int)$TSconfig['htmlSpecialChars'], | ||
| 978 | $addConfig | ||
| 979 | ]; | ||
| 980 | } | ||
| 981 | |||
| 982 | /** | ||
| 983 | * Strips empty tags from HTML. | ||
| 984 | * | ||
| 985 | * @param string $content The content to be stripped of empty tags | ||
| 986 | * @param string $tagList The comma separated list of tags to be stripped. | ||
| 987 | * If empty, all empty tags will be stripped | ||
| 988 | * @param bool $treatNonBreakingSpaceAsEmpty If TRUE tags containing only   entities will be treated as empty. | ||
| 989 | * @param bool $keepTags If true, the provided tags will be kept instead of stripped. | ||
| 990 | * @return string the stripped content | ||
| 991 | */ | ||
| 992 | public function stripEmptyTags($content, $tagList = '', $treatNonBreakingSpaceAsEmpty = false, $keepTags = false) | ||
| 993 |     { | ||
| 994 |         if (!empty($tagList)) { | ||
| 995 |             $tagRegEx = implode('|', GeneralUtility::trimExplode(',', $tagList, true)); | ||
| 996 |             if ($keepTags) { | ||
| 997 | $tagRegEx = '(?!' . $tagRegEx . ')[^ >]+'; | ||
| 998 | } | ||
| 999 |         } else { | ||
| 1000 | $tagRegEx = '[^ >]+'; // all characters until you reach a > or space; | ||
| 1001 | } | ||
| 1002 | $count = 1; | ||
| 1003 | $nbspRegex = $treatNonBreakingSpaceAsEmpty ? '|( )' : ''; | ||
| 1004 |         $finalRegex = sprintf('/<(%s)[^>]*>( %s)*<\/\\1[^>]*>/i', $tagRegEx, $nbspRegex); | ||
| 1005 |         while ($count !== 0) { | ||
| 1006 | $content = preg_replace($finalRegex, '', $content, -1, $count); | ||
| 1007 | } | ||
| 1008 | return $content; | ||
| 1009 | } | ||
| 1010 | |||
| 1011 | /** | ||
| 1012 | * Strips the configured empty tags from the HMTL code. | ||
| 1013 | * | ||
| 1014 | * @param string $value | ||
| 1015 | * @param array $configuration | ||
| 1016 | * @return string | ||
| 1017 | */ | ||
| 1018 | protected function stripEmptyTagsIfConfigured($value, $configuration) | ||
| 1036 | } | ||
| 1037 | } | ||
| 1038 |