| Total Complexity | 120 |
| Total Lines | 839 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like RteHtmlParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use RteHtmlParser, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 33 | class RteHtmlParser extends HtmlParser implements LoggerAwareInterface |
||
| 34 | { |
||
| 35 | use LoggerAwareTrait; |
||
| 36 | |||
| 37 | /** |
||
| 38 | * List of elements that are not wrapped into a "p" tag while doing the transformation. |
||
| 39 | * @var string |
||
| 40 | */ |
||
| 41 | protected $blockElementList = 'DIV,TABLE,BLOCKQUOTE,PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE'; |
||
| 42 | |||
| 43 | /** |
||
| 44 | * List of all tags that are allowed by default |
||
| 45 | * @var string |
||
| 46 | */ |
||
| 47 | protected $defaultAllowedTagsList = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span,abbr,acronym,dfn'; |
||
| 48 | |||
| 49 | /** |
||
| 50 | * Set to the TSconfig options coming from Page TSconfig |
||
| 51 | * |
||
| 52 | * @var array |
||
| 53 | */ |
||
| 54 | protected $procOptions = []; |
||
| 55 | |||
| 56 | /** |
||
| 57 | * Run-away brake for recursive calls. |
||
| 58 | * |
||
| 59 | * @var int |
||
| 60 | */ |
||
| 61 | protected $TS_transform_db_safecounter = 100; |
||
| 62 | |||
| 63 | /** |
||
| 64 | * Data caching for processing function |
||
| 65 | * |
||
| 66 | * @var array |
||
| 67 | */ |
||
| 68 | protected $getKeepTags_cache = []; |
||
| 69 | |||
| 70 | /** |
||
| 71 | * Storage of the allowed CSS class names in the RTE |
||
| 72 | * |
||
| 73 | * @var array |
||
| 74 | */ |
||
| 75 | protected $allowedClasses = []; |
||
| 76 | |||
| 77 | /** |
||
| 78 | * A list of HTML attributes for <p> tags. Because <p> tags are wrapped currently in a special handling, |
||
| 79 | * they have a special place for configuration via 'proc.keepPDIVattribs' |
||
| 80 | * |
||
| 81 | * @var array |
||
| 82 | */ |
||
| 83 | protected $allowedAttributesForParagraphTags = [ |
||
| 84 | 'class', |
||
| 85 | 'align', |
||
| 86 | 'id', |
||
| 87 | 'title', |
||
| 88 | 'dir', |
||
| 89 | 'lang', |
||
| 90 | 'xml:lang', |
||
| 91 | 'itemscope', |
||
| 92 | 'itemtype', |
||
| 93 | 'itemprop' |
||
| 94 | ]; |
||
| 95 | |||
| 96 | /** |
||
| 97 | * Any tags that are allowed outside of <p> sections - usually similar to the block elements |
||
| 98 | * plus some special tags like <hr> and <img> (if images are allowed). |
||
| 99 | * Completely overrideable via 'proc.allowTagsOutside' |
||
| 100 | * |
||
| 101 | * @var array |
||
| 102 | */ |
||
| 103 | protected $allowedTagsOutsideOfParagraphs = [ |
||
| 104 | 'address', |
||
| 105 | 'article', |
||
| 106 | 'aside', |
||
| 107 | 'blockquote', |
||
| 108 | 'div', |
||
| 109 | 'footer', |
||
| 110 | 'header', |
||
| 111 | 'hr', |
||
| 112 | 'nav', |
||
| 113 | 'section' |
||
| 114 | ]; |
||
| 115 | |||
| 116 | /** |
||
| 117 | * @var EventDispatcherInterface |
||
| 118 | */ |
||
| 119 | protected $eventDispatcher; |
||
| 120 | |||
| 121 | public function __construct(EventDispatcherInterface $eventDispatcher) |
||
| 122 | { |
||
| 123 | $this->eventDispatcher = $eventDispatcher; |
||
| 124 | } |
||
| 125 | |||
| 126 | /** |
||
| 127 | * Initialize, setting element reference and record PID |
||
| 128 | * |
||
| 129 | * @param string $elRef Element reference, eg "tt_content:bodytext |
||
| 130 | * @param int $recPid PID of the record (page id) |
||
| 131 | * @deprecated will be removed in TYPO3 v11.0, as it serves no purpose anymore |
||
| 132 | */ |
||
| 133 | public function init($elRef = '', $recPid = 0) |
||
| 134 | { |
||
| 135 | trigger_error('RteHtmlParser->init() is not needed anymore for RTE transformation, and will be removed in TYPO3 v11.0.', E_USER_DEPRECATED); |
||
| 136 | } |
||
| 137 | |||
| 138 | /** |
||
| 139 | * Sanitize and streamline given options (usually from RichTextConfiguration results "proc." |
||
| 140 | * and set them to the respective properties. |
||
| 141 | * |
||
| 142 | * @param array $processingConfiguration |
||
| 143 | */ |
||
| 144 | protected function setProcessingConfiguration(array $processingConfiguration): void |
||
| 145 | { |
||
| 146 | $this->procOptions = $processingConfiguration; |
||
| 147 | if (isset($this->procOptions['allowedClasses.'])) { |
||
| 148 | $this->allowedClasses = (array)$this->procOptions['allowedClasses.']; |
||
| 149 | } else { |
||
| 150 | $this->allowedClasses = GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'] ?? '', true); |
||
| 151 | } |
||
| 152 | |||
| 153 | // Dynamic configuration of blockElementList |
||
| 154 | if (!empty($this->procOptions['blockElementList'])) { |
||
| 155 | $this->blockElementList = $this->procOptions['blockElementList']; |
||
| 156 | } |
||
| 157 | |||
| 158 | // Define which attributes are allowed on <p> tags |
||
| 159 | if (isset($this->procOptions['allowAttributes.'])) { |
||
| 160 | $this->allowedAttributesForParagraphTags = $this->procOptions['allowAttributes.']; |
||
| 161 | } |
||
| 162 | // Override tags which are allowed outside of <p> tags |
||
| 163 | if (isset($this->procOptions['allowTagsOutside'])) { |
||
| 164 | if (!isset($this->procOptions['allowTagsOutside.'])) { |
||
| 165 | $this->allowedTagsOutsideOfParagraphs = GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside']), true); |
||
| 166 | } else { |
||
| 167 | $this->allowedTagsOutsideOfParagraphs = (array)$this->procOptions['allowTagsOutside.']; |
||
| 168 | } |
||
| 169 | } |
||
| 170 | } |
||
| 171 | |||
| 172 | /** |
||
| 173 | * Main entry point for transforming RTE content in the database so the Rich Text Editor can deal with |
||
| 174 | * e.g. links. |
||
| 175 | * |
||
| 176 | * @param string $value |
||
| 177 | * @param array $processingConfiguration |
||
| 178 | * @return string |
||
| 179 | */ |
||
| 180 | public function transformTextForRichTextEditor(string $value, array $processingConfiguration): string |
||
| 181 | { |
||
| 182 | $this->setProcessingConfiguration($processingConfiguration); |
||
| 183 | $modes = $this->resolveAppliedTransformationModes('rte'); |
||
| 184 | $value = $this->streamlineLineBreaksForProcessing($value); |
||
| 185 | // If an entry HTML cleaner was configured, pass the content through the HTMLcleaner |
||
| 186 | $value = $this->runHtmlParserIfConfigured($value, 'entryHTMLparser_rte'); |
||
| 187 | // Traverse modes |
||
| 188 | foreach ($modes as $cmd) { |
||
| 189 | // Checking for user defined transformation: |
||
| 190 | if (!empty($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd])) { |
||
| 191 | $_procObj = GeneralUtility::makeInstance($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]); |
||
| 192 | $_procObj->pObj = $this; |
||
| 193 | $value = $_procObj->transform_rte($value, $this); |
||
| 194 | } else { |
||
| 195 | // ... else use defaults: |
||
| 196 | switch ($cmd) { |
||
| 197 | case 'detectbrokenlinks': |
||
| 198 | $value = $this->markBrokenLinks($value); |
||
| 199 | break; |
||
| 200 | case 'css_transform': |
||
| 201 | $value = $this->TS_transform_rte($value); |
||
| 202 | break; |
||
| 203 | default: |
||
| 204 | // Do nothing |
||
| 205 | } |
||
| 206 | } |
||
| 207 | } |
||
| 208 | // If an exit HTML cleaner was configured, pass the content through the HTMLcleaner |
||
| 209 | $value = $this->runHtmlParserIfConfigured($value, 'exitHTMLparser_rte'); |
||
| 210 | // Final clean up of linebreaks |
||
| 211 | $value = $this->streamlineLineBreaksAfterProcessing($value); |
||
| 212 | return $value; |
||
| 213 | } |
||
| 214 | |||
| 215 | /** |
||
| 216 | * Called to process HTML content before it is stored in the database. |
||
| 217 | * |
||
| 218 | * @param string $value |
||
| 219 | * @param array $processingConfiguration |
||
| 220 | * @return string |
||
| 221 | */ |
||
| 222 | public function transformTextForPersistence(string $value, array $processingConfiguration): string |
||
| 263 | } |
||
| 264 | |||
| 265 | /********************************************** |
||
| 266 | * |
||
| 267 | * Main function |
||
| 268 | * |
||
| 269 | **********************************************/ |
||
| 270 | /** |
||
| 271 | * Transform value for RTE based on specConf in the direction specified by $direction (rte/db) |
||
| 272 | * This is the main function called from DataHandler and transfer data classes, but has been superseded |
||
| 273 | * by the methods |
||
| 274 | * - transformTextForRichTextEditor() |
||
| 275 | * - transformTextForPersistence() |
||
| 276 | * to avoid the $direction argument. |
||
| 277 | * Please use the new methods for TYPO3 v10+ - only code |
||
| 278 | * |
||
| 279 | * @param string $value Input value |
||
| 280 | * @param null $_ unused |
||
| 281 | * @param string $direction Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE. |
||
| 282 | * @param array $thisConfig Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig. |
||
| 283 | * @return string Output value |
||
| 284 | * @deprecated will be removed in TYPO3 v11.0, use the transformText* methods instead. |
||
| 285 | */ |
||
| 286 | public function RTE_transform($value, $_ = null, $direction = 'rte', $thisConfig = []) |
||
| 287 | { |
||
| 288 | trigger_error('RteHtmlParser->RTE_transform() will be removed in TYPO3 v11.0. Use the transformTextFor* methods in the same class instead', E_USER_DEPRECATED); |
||
| 289 | if ($direction === 'rte') { |
||
| 290 | return $this->transformTextForRichTextEditor($value, $thisConfig['proc.'] ?? []); |
||
| 291 | } |
||
| 292 | if ($direction === 'db') { |
||
| 293 | return $this->transformTextForPersistence($value, $thisConfig['proc.'] ?? []); |
||
| 294 | } |
||
| 295 | return $value; |
||
| 296 | } |
||
| 297 | |||
| 298 | /** |
||
| 299 | * Ensures what transformation modes should be executed, and that they are only executed once. |
||
| 300 | * |
||
| 301 | * @param string $direction |
||
| 302 | * @return array the resolved transformation modes |
||
| 303 | */ |
||
| 304 | protected function resolveAppliedTransformationModes(string $direction): array |
||
| 326 | } |
||
| 327 | |||
| 328 | /** |
||
| 329 | * Runs the HTML parser if it is configured |
||
| 330 | * Getting additional HTML cleaner configuration. These are applied either before or after the main transformation |
||
| 331 | * is done and thus totally independent processing options you can set up. |
||
| 332 | * |
||
| 333 | * This is only possible via TSconfig (procOptions) currently. |
||
| 334 | * |
||
| 335 | * @param string $content |
||
| 336 | * @param string $configurationDirective used to look up in the procOptions if enabled, and then fetch the |
||
| 337 | * @return string the processed content |
||
| 338 | */ |
||
| 339 | protected function runHtmlParserIfConfigured($content, $configurationDirective) |
||
| 346 | } |
||
| 347 | |||
| 348 | /************************************ |
||
| 349 | * |
||
| 350 | * Specific RTE TRANSFORMATION functions |
||
| 351 | * |
||
| 352 | *************************************/ |
||
| 353 | |||
| 354 | /** |
||
| 355 | * Transformation handler: 'ts_links' / direction: "db" |
||
| 356 | * Processing anchor tags, and resolves them correctly again via the LinkService syntax |
||
| 357 | * |
||
| 358 | * Splits content into <a> tag blocks and processes each tag, and allows hooks to actually render |
||
| 359 | * the result. |
||
| 360 | * |
||
| 361 | * @param string $value Content input |
||
| 362 | * @return string Content output |
||
| 363 | */ |
||
| 364 | protected function TS_links_db($value) |
||
| 365 | { |
||
| 366 | $blockSplit = $this->splitIntoBlock('A', $value); |
||
| 367 | foreach ($blockSplit as $k => $v) { |
||
| 368 | if ($k % 2) { |
||
| 369 | [$tagAttributes] = $this->get_tag_attributes($this->getFirstTag($v), true); |
||
| 370 | |||
| 371 | // Anchors would not have an href attribute |
||
| 372 | if (!isset($tagAttributes['href'])) { |
||
| 373 | continue; |
||
| 374 | } |
||
| 375 | $linkService = GeneralUtility::makeInstance(LinkService::class); |
||
| 376 | // Store the link as <a> tag as default by TYPO3, with the link service syntax |
||
| 377 | try { |
||
| 378 | $linkInformation = $linkService->resolve($tagAttributes['href'] ?? ''); |
||
| 379 | $tagAttributes['href'] = $linkService->asString($linkInformation); |
||
| 380 | } catch (UnknownLinkHandlerException $e) { |
||
| 381 | $tagAttributes['href'] = $linkInformation['href'] ?? $tagAttributes['href']; |
||
| 382 | } |
||
| 383 | |||
| 384 | $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($tagAttributes, true) . '>' |
||
| 385 | . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</a>'; |
||
| 386 | } |
||
| 387 | } |
||
| 388 | return implode('', $blockSplit); |
||
| 389 | } |
||
| 390 | |||
| 391 | /** |
||
| 392 | * Transformation handler: 'css_transform' / direction: "db" |
||
| 393 | * Cleaning (->db) for standard content elements (ts) |
||
| 394 | * |
||
| 395 | * @param string $value Content input |
||
| 396 | * @return string Content output |
||
| 397 | * @see TS_transform_rte() |
||
| 398 | */ |
||
| 399 | protected function TS_transform_db($value) |
||
| 400 | { |
||
| 401 | // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...) |
||
| 402 | $this->TS_transform_db_safecounter--; |
||
| 403 | if ($this->TS_transform_db_safecounter < 0) { |
||
| 404 | return $value; |
||
| 405 | } |
||
| 406 | // Split the content from RTE by the occurrence of these blocks: |
||
| 407 | $blockSplit = $this->splitIntoBlock($this->blockElementList, $value); |
||
| 408 | |||
| 409 | // Avoid superfluous linebreaks by transform_db after ending headListTag |
||
| 410 | while (count($blockSplit) > 0 && trim(end($blockSplit)) === '') { |
||
| 411 | array_pop($blockSplit); |
||
| 412 | } |
||
| 413 | |||
| 414 | // Traverse the blocks |
||
| 415 | foreach ($blockSplit as $k => $v) { |
||
| 416 | if ($k % 2) { |
||
| 417 | // Inside block: |
||
| 418 | // Init: |
||
| 419 | $tag = $this->getFirstTag($v); |
||
| 420 | $tagName = strtolower($this->getFirstTagName($v)); |
||
| 421 | // Process based on the tag: |
||
| 422 | switch ($tagName) { |
||
| 423 | case 'blockquote': |
||
| 424 | case 'dd': |
||
| 425 | case 'div': |
||
| 426 | case 'header': |
||
| 427 | case 'section': |
||
| 428 | case 'footer': |
||
| 429 | case 'nav': |
||
| 430 | case 'article': |
||
| 431 | case 'aside': |
||
| 432 | $blockSplit[$k] = $tag . $this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>'; |
||
| 433 | break; |
||
| 434 | case 'pre': |
||
| 435 | break; |
||
| 436 | default: |
||
| 437 | // usually <hx> tags and <table> tags where no other block elements are within the tags |
||
| 438 | // Eliminate true linebreaks inside block element tags |
||
| 439 | $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]); |
||
| 440 | } |
||
| 441 | } else { |
||
| 442 | // NON-block: |
||
| 443 | if (trim($blockSplit[$k]) !== '') { |
||
| 444 | $blockSplit[$k] = str_replace('<hr/>', '<hr />', $blockSplit[$k]); |
||
| 445 | // Remove linebreaks preceding hr tags |
||
| 446 | $blockSplit[$k] = preg_replace('/[' . LF . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $blockSplit[$k]); |
||
| 447 | // Remove linebreaks following hr tags |
||
| 448 | $blockSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . ']+/', '<$1$2/>', $blockSplit[$k]); |
||
| 449 | // Replace other linebreaks with space |
||
| 450 | $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]); |
||
| 451 | $blockSplit[$k] = $this->divideIntoLines($blockSplit[$k]); |
||
| 452 | } else { |
||
| 453 | unset($blockSplit[$k]); |
||
| 454 | } |
||
| 455 | } |
||
| 456 | } |
||
| 457 | $this->TS_transform_db_safecounter++; |
||
| 458 | return implode(LF, $blockSplit); |
||
| 459 | } |
||
| 460 | |||
| 461 | /** |
||
| 462 | * Transformation handler: css_transform / direction: "rte" |
||
| 463 | * Set (->rte) for standard content elements (ts) |
||
| 464 | * |
||
| 465 | * @param string $value Content input |
||
| 466 | * @return string Content output |
||
| 467 | * @see TS_transform_db() |
||
| 468 | */ |
||
| 469 | protected function TS_transform_rte($value) |
||
| 470 | { |
||
| 471 | // Split the content from database by the occurrence of the block elements |
||
| 472 | $blockSplit = $this->splitIntoBlock($this->blockElementList, $value); |
||
| 473 | // Traverse the blocks |
||
| 474 | foreach ($blockSplit as $k => $v) { |
||
| 475 | if ($k % 2) { |
||
| 476 | // Inside one of the blocks: |
||
| 477 | // Init: |
||
| 478 | $tag = $this->getFirstTag($v); |
||
| 479 | $tagName = strtolower($this->getFirstTagName($v)); |
||
| 480 | // Based on tagname, we do transformations: |
||
| 481 | switch ($tagName) { |
||
| 482 | case 'blockquote': |
||
| 483 | case 'dd': |
||
| 484 | case 'div': |
||
| 485 | case 'header': |
||
| 486 | case 'section': |
||
| 487 | case 'footer': |
||
| 488 | case 'nav': |
||
| 489 | case 'article': |
||
| 490 | case 'aside': |
||
| 491 | $blockSplit[$k] = $tag . $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>'; |
||
| 492 | break; |
||
| 493 | } |
||
| 494 | $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]); |
||
| 495 | } else { |
||
| 496 | // NON-block: |
||
| 497 | $nextFTN = $this->getFirstTagName($blockSplit[$k + 1] ?? ''); |
||
| 498 | $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1); |
||
| 499 | // If the line is followed by a block or is the last line: |
||
| 500 | if (GeneralUtility::inList($this->blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) { |
||
| 501 | // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1 |
||
| 502 | if (!$onlyLineBreaks) { |
||
| 503 | $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]); |
||
| 504 | } else { |
||
| 505 | // If the line contains only linebreaks, remove the leading linebreak |
||
| 506 | $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]); |
||
| 507 | } |
||
| 508 | } |
||
| 509 | // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks |
||
| 510 | if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) { |
||
| 511 | unset($blockSplit[$k]); |
||
| 512 | } else { |
||
| 513 | $blockSplit[$k] = $this->setDivTags($blockSplit[$k]); |
||
| 514 | } |
||
| 515 | } |
||
| 516 | } |
||
| 517 | return implode(LF, $blockSplit); |
||
| 518 | } |
||
| 519 | |||
| 520 | /*************************************************************** |
||
| 521 | * |
||
| 522 | * Generic RTE transformation, analysis and helper functions |
||
| 523 | * |
||
| 524 | **************************************************************/ |
||
| 525 | |||
| 526 | /** |
||
| 527 | * Function for cleaning content going into the database. |
||
| 528 | * Content is cleaned eg. by removing unallowed HTML and ds-HSC content |
||
| 529 | * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db |
||
| 530 | * |
||
| 531 | * @param string $content Content to clean up |
||
| 532 | * @return string Clean content |
||
| 533 | * @see getKeepTags() |
||
| 534 | */ |
||
| 535 | protected function HTMLcleaner_db($content) |
||
| 536 | { |
||
| 537 | $keepTags = $this->getKeepTags('db'); |
||
| 538 | return $this->HTMLcleaner($content, $keepTags, false); |
||
| 539 | } |
||
| 540 | |||
| 541 | /** |
||
| 542 | * Creates an array of configuration for the HTMLcleaner function based on whether content |
||
| 543 | * go TO or FROM the Rich Text Editor ($direction) |
||
| 544 | * |
||
| 545 | * @param string $direction The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form) |
||
| 546 | * @return array Configuration array |
||
| 547 | * @see HTMLcleaner_db() |
||
| 548 | */ |
||
| 549 | protected function getKeepTags($direction = 'rte') |
||
| 550 | { |
||
| 551 | if (!isset($this->getKeepTags_cache[$direction]) || !is_array($this->getKeepTags_cache[$direction])) { |
||
| 552 | // Setting up allowed tags: |
||
| 553 | // Default is to get allowed/denied tags from internal array of processing options: |
||
| 554 | // Construct default list of tags to keep: |
||
| 555 | if (isset($this->procOptions['allowTags.']) && is_array($this->procOptions['allowTags.'])) { |
||
| 556 | $keepTags = implode(',', $this->procOptions['allowTags.']); |
||
| 557 | } else { |
||
| 558 | $keepTags = $this->procOptions['allowTags'] ?? ''; |
||
| 559 | } |
||
| 560 | $keepTags = array_flip(GeneralUtility::trimExplode(',', $this->defaultAllowedTagsList . ',' . strtolower($keepTags), true)); |
||
| 561 | // For tags to deny, remove them from $keepTags array: |
||
| 562 | $denyTags = GeneralUtility::trimExplode(',', $this->procOptions['denyTags'] ?? '', true); |
||
| 563 | foreach ($denyTags as $dKe) { |
||
| 564 | unset($keepTags[$dKe]); |
||
| 565 | } |
||
| 566 | // Based on the direction of content, set further options: |
||
| 567 | switch ($direction) { |
||
| 568 | case 'rte': |
||
| 569 | // Transforming keepTags array so it can be understood by the HTMLcleaner function. |
||
| 570 | // This basically converts the format of the array from TypoScript (having dots) to plain multi-dimensional array. |
||
| 571 | [$keepTags] = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'] ?? [], $keepTags); |
||
| 572 | break; |
||
| 573 | case 'db': |
||
| 574 | // Setting up span tags if they are allowed: |
||
| 575 | if (isset($keepTags['span'])) { |
||
| 576 | $keepTags['span'] = [ |
||
| 577 | 'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop', |
||
| 578 | 'fixAttrib' => [ |
||
| 579 | 'class' => [ |
||
| 580 | 'removeIfFalse' => 1 |
||
| 581 | ] |
||
| 582 | ], |
||
| 583 | 'rmTagIfNoAttrib' => 1 |
||
| 584 | ]; |
||
| 585 | if (!empty($this->allowedClasses)) { |
||
| 586 | $keepTags['span']['fixAttrib']['class']['list'] = $this->allowedClasses; |
||
| 587 | } |
||
| 588 | } |
||
| 589 | // Setting further options, getting them from the processing options |
||
| 590 | $TSc = $this->procOptions['HTMLparser_db.'] ?? []; |
||
| 591 | if (empty($TSc['globalNesting'])) { |
||
| 592 | $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span'; |
||
| 593 | } |
||
| 594 | if (empty($TSc['noAttrib'])) { |
||
| 595 | $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike'; |
||
| 596 | } |
||
| 597 | // Transforming the array from TypoScript to regular array: |
||
| 598 | [$keepTags] = $this->HTMLparserConfig($TSc, $keepTags); |
||
| 599 | break; |
||
| 600 | } |
||
| 601 | // Caching (internally, in object memory) the result |
||
| 602 | $this->getKeepTags_cache[$direction] = $keepTags; |
||
| 603 | } |
||
| 604 | // Return result: |
||
| 605 | return $this->getKeepTags_cache[$direction]; |
||
| 606 | } |
||
| 607 | |||
| 608 | /** |
||
| 609 | * This resolves the $value into parts based on <p>-sections. These are returned as lines separated by LF. |
||
| 610 | * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable' |
||
| 611 | * The function ->setDivTags does the opposite. |
||
| 612 | * This function processes content to go into the database. |
||
| 613 | * |
||
| 614 | * @param string $value Value to process. |
||
| 615 | * @param int $count Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p tags). |
||
| 616 | * @param bool $returnArray If TRUE, an array with the lines is returned, otherwise a string of the processed input value. |
||
| 617 | * @return string|array Processed input value. |
||
| 618 | * @see setDivTags() |
||
| 619 | */ |
||
| 620 | protected function divideIntoLines($value, $count = 5, $returnArray = false) |
||
| 621 | { |
||
| 622 | // Setting the third param will eliminate false end-tags. Maybe this is a good thing to do...? |
||
| 623 | $paragraphBlocks = $this->splitIntoBlock('p', $value, true); |
||
| 624 | // Returns plainly the content if there was no p sections in it |
||
| 625 | if (count($paragraphBlocks) <= 1 || $count <= 0) { |
||
| 626 | return $this->sanitizeLineBreaksForContentOnly($value); |
||
| 627 | } |
||
| 628 | |||
| 629 | // Traverse the splitted sections |
||
| 630 | foreach ($paragraphBlocks as $k => $v) { |
||
| 631 | if ($k % 2) { |
||
| 632 | // Inside a <p> section |
||
| 633 | $v = $this->removeFirstAndLastTag($v); |
||
| 634 | // Fetching 'sub-lines' - which will explode any further p nesting recursively |
||
| 635 | $subLines = $this->divideIntoLines($v, $count - 1, true); |
||
| 636 | // So, if there happened to be sub-nesting of p, this is written directly as the new content of THIS section. (This would be considered 'an error') |
||
| 637 | if (is_array($subLines)) { |
||
| 638 | $paragraphBlocks[$k] = implode(LF, $subLines); |
||
| 639 | } else { |
||
| 640 | //... but if NO subsection was found, we process it as a TRUE line without erroneous content: |
||
| 641 | $paragraphBlocks[$k] = $this->processContentWithinParagraph($subLines, $paragraphBlocks[$k]); |
||
| 642 | } |
||
| 643 | // If it turns out the line is just blank (containing a possibly) then just make it pure blank. |
||
| 644 | // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes. |
||
| 645 | // Those attributes should have been filtered before; if they are still there they must be considered as possible content. |
||
| 646 | if (trim(strip_tags($paragraphBlocks[$k])) === ' ' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $paragraphBlocks[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($paragraphBlocks[$k]))) { |
||
| 647 | $paragraphBlocks[$k] = ''; |
||
| 648 | } |
||
| 649 | } else { |
||
| 650 | // Outside a paragraph, if there is still something in there, just add a <p> tag |
||
| 651 | // Remove positions which are outside <p> tags and without content |
||
| 652 | $paragraphBlocks[$k] = trim(strip_tags($paragraphBlocks[$k], '<' . implode('><', $this->allowedTagsOutsideOfParagraphs) . '>')); |
||
| 653 | $paragraphBlocks[$k] = $this->sanitizeLineBreaksForContentOnly($paragraphBlocks[$k]); |
||
| 654 | if ((string)$paragraphBlocks[$k] === '') { |
||
| 655 | unset($paragraphBlocks[$k]); |
||
| 656 | } else { |
||
| 657 | // add <p> tags around the content |
||
| 658 | $paragraphBlocks[$k] = str_replace(strip_tags($paragraphBlocks[$k]), '<p>' . strip_tags($paragraphBlocks[$k]) . '</p>', $paragraphBlocks[$k]); |
||
| 659 | } |
||
| 660 | } |
||
| 661 | } |
||
| 662 | return $returnArray ? $paragraphBlocks : implode(LF, $paragraphBlocks); |
||
| 663 | } |
||
| 664 | |||
| 665 | /** |
||
| 666 | * Converts all lines into <p></p>-sections (unless the line has a p - tag already) |
||
| 667 | * For processing of content going FROM database TO RTE. |
||
| 668 | * |
||
| 669 | * @param string $value Value to convert |
||
| 670 | * @return string Processed value. |
||
| 671 | * @see divideIntoLines() |
||
| 672 | */ |
||
| 673 | protected function setDivTags($value) |
||
| 704 | } |
||
| 705 | |||
| 706 | /** |
||
| 707 | * Used for transformation from RTE to DB |
||
| 708 | * |
||
| 709 | * Works on a single line within a <p> tag when storing into the database |
||
| 710 | * This always adds <p> tags and validates the arguments, |
||
| 711 | * additionally the content is cleaned up via the HTMLcleaner. |
||
| 712 | * |
||
| 713 | * @param string $content the content within the <p> tag |
||
| 714 | * @param string $fullContentWithTag the whole <p> tag surrounded as well |
||
| 715 | * |
||
| 716 | * @return string the full <p> tag with cleaned content |
||
| 717 | */ |
||
| 718 | protected function processContentWithinParagraph(string $content, string $fullContentWithTag) |
||
| 719 | { |
||
| 720 | // clean up the content |
||
| 721 | $content = $this->HTMLcleaner_db($content); |
||
| 722 | // Get the <p> tag, and validate the attributes |
||
| 723 | $fTag = $this->getFirstTag($fullContentWithTag); |
||
| 724 | // Check which attributes of the <p> tag to keep attributes |
||
| 725 | if (!empty($this->allowedAttributesForParagraphTags)) { |
||
| 726 | [$tagAttributes] = $this->get_tag_attributes($fTag); |
||
| 727 | // Make sure the tag attributes only contain the ones that are defined to be allowed |
||
| 728 | $tagAttributes = array_intersect_key($tagAttributes, array_flip($this->allowedAttributesForParagraphTags)); |
||
| 729 | |||
| 730 | // Only allow classes that are whitelisted in $this->allowedClasses |
||
| 731 | if (isset($tagAttributes['class']) && trim($tagAttributes['class']) !== '' && !empty($this->allowedClasses) && !in_array($tagAttributes['class'], $this->allowedClasses, true)) { |
||
| 732 | $classes = GeneralUtility::trimExplode(' ', $tagAttributes['class'], true); |
||
| 733 | $classes = array_intersect($classes, $this->allowedClasses); |
||
| 734 | if (!empty($classes)) { |
||
| 735 | $tagAttributes['class'] = implode(' ', $classes); |
||
| 736 | } else { |
||
| 737 | unset($tagAttributes['class']); |
||
| 738 | } |
||
| 739 | } |
||
| 740 | } else { |
||
| 741 | $tagAttributes = []; |
||
| 742 | } |
||
| 743 | // Remove any line break |
||
| 744 | $content = str_replace(LF, '', $content); |
||
| 745 | // Compile the surrounding <p> tag |
||
| 746 | $content = '<' . rtrim('p ' . $this->compileTagAttribs($tagAttributes)) . '>' . $content . '</p>'; |
||
| 747 | return $content; |
||
| 748 | } |
||
| 749 | |||
| 750 | /** |
||
| 751 | * Wrap <hr> tags with LFs, and also remove double LFs, used when transforming from RTE to DB |
||
| 752 | * |
||
| 753 | * @param string $content |
||
| 754 | * @return string the modified content |
||
| 755 | */ |
||
| 756 | protected function sanitizeLineBreaksForContentOnly(string $content) |
||
| 757 | { |
||
| 758 | $content = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $content); |
||
| 759 | $content = str_replace(LF . LF, LF, $content); |
||
| 760 | $content = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $content); |
||
| 761 | return $content; |
||
| 762 | } |
||
| 763 | |||
| 764 | /** |
||
| 765 | * Called before any processing / transformation is made |
||
| 766 | * Removing any CRs (char 13) and only deal with LFs (char 10) internally. |
||
| 767 | * CR has a very disturbing effect, so just remove all CR and rely on LF |
||
| 768 | * |
||
| 769 | * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks. |
||
| 770 | * |
||
| 771 | * @param string $content the content to process |
||
| 772 | * @return string the modified content |
||
| 773 | */ |
||
| 774 | protected function streamlineLineBreaksForProcessing(string $content) |
||
| 775 | { |
||
| 776 | return str_replace(CR, '', $content); |
||
| 777 | } |
||
| 778 | |||
| 779 | /** |
||
| 780 | * Called after any processing / transformation was made |
||
| 781 | * just before the content is returned by the RTE parser all line breaks |
||
| 782 | * get unified to be "CRLF"s again. |
||
| 783 | * |
||
| 784 | * Historical note: Previously it was possible to disable this functionality via disableUnifyLineBreaks. |
||
| 785 | * |
||
| 786 | * @param string $content the content to process |
||
| 787 | * @return string the modified content |
||
| 788 | */ |
||
| 789 | protected function streamlineLineBreaksAfterProcessing(string $content) |
||
| 790 | { |
||
| 791 | // Make sure no \r\n sequences has entered in the meantime |
||
| 792 | $content = $this->streamlineLineBreaksForProcessing($content); |
||
| 793 | // ... and then change all \n into \r\n |
||
| 794 | return str_replace(LF, CRLF, $content); |
||
| 795 | } |
||
| 796 | |||
| 797 | /** |
||
| 798 | * Content Transformation from DB to RTE |
||
| 799 | * Checks all <a> tags which reference a t3://page and checks if the page is available |
||
| 800 | * If not, some offensive styling is added. |
||
| 801 | * |
||
| 802 | * @param string $content |
||
| 803 | * @return string the modified content |
||
| 804 | */ |
||
| 805 | protected function markBrokenLinks(string $content): string |
||
| 806 | { |
||
| 807 | $blocks = $this->splitIntoBlock('A', $content); |
||
| 808 | $linkService = GeneralUtility::makeInstance(LinkService::class); |
||
| 809 | foreach ($blocks as $position => $value) { |
||
| 810 | if ($position % 2 === 0) { |
||
| 811 | continue; |
||
| 812 | } |
||
| 813 | [$attributes] = $this->get_tag_attributes($this->getFirstTag($value), true); |
||
| 814 | if (empty($attributes['href'])) { |
||
| 815 | continue; |
||
| 816 | } |
||
| 817 | |||
| 818 | try { |
||
| 819 | $hrefInformation = $linkService->resolve($attributes['href']); |
||
| 820 | |||
| 821 | $brokenLinkAnalysis = new BrokenLinkAnalysisEvent($hrefInformation['type'], $hrefInformation); |
||
| 822 | $this->eventDispatcher->dispatch($brokenLinkAnalysis); |
||
| 823 | if ($brokenLinkAnalysis->isBrokenLink()) { |
||
| 824 | $attributes['data-rte-error'] = $brokenLinkAnalysis->getReason(); |
||
| 825 | } |
||
| 826 | } catch (UnknownLinkHandlerException $e) { |
||
| 827 | $attributes['data-rte-error'] = $e->getMessage(); |
||
| 828 | } |
||
| 829 | |||
| 830 | // Always rewrite the block to allow the nested calling even if a page is found |
||
| 831 | $blocks[$position] = |
||
| 832 | '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>' |
||
| 833 | . $this->markBrokenLinks($this->removeFirstAndLastTag($blocks[$position])) |
||
| 834 | . '</a>'; |
||
| 835 | } |
||
| 836 | return implode('', $blocks); |
||
| 837 | } |
||
| 838 | |||
| 839 | /** |
||
| 840 | * Content Transformation from RTE to DB |
||
| 841 | * Removes link information error attributes from <a> tags that are added to broken links |
||
| 842 | * |
||
| 843 | * @param string $content the content to process |
||
| 844 | * @return string the modified content |
||
| 845 | */ |
||
| 846 | protected function removeBrokenLinkMarkers(string $content): string |
||
| 872 | } |
||
| 873 | } |
||
| 874 |
This check looks for parameters that have been defined for a function or method, but which are not used in the method body.