XoopsModules25x /
xnewsletter
This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
| 1 | <?php |
||
| 2 | /************************************************************************* |
||
| 3 | * * |
||
| 4 | * Converts HTML to formatted plain text * |
||
| 5 | * * |
||
| 6 | * Portions Copyright (c) 2005-2007 Jon Abernathy <[email protected]> * |
||
| 7 | * * |
||
| 8 | * This script is free software; you can redistribute it and/or modify * |
||
| 9 | * it under the terms of the GNU General Public License as published by * |
||
| 10 | * the Free Software Foundation; either version 2 of the License, or * |
||
| 11 | * (at your option) any later version. * |
||
| 12 | * * |
||
| 13 | * The GNU General Public License can be found at * |
||
| 14 | * http://www.gnu.org/copyleft/gpl.html. * |
||
| 15 | * * |
||
| 16 | * This script is distributed in the hope that it will be useful, * |
||
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
||
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
||
| 19 | * GNU General Public License for more details. * |
||
| 20 | * * |
||
| 21 | *************************************************************************/ |
||
| 22 | |||
| 23 | /** |
||
| 24 | * Converts HTML to formatted plain text |
||
| 25 | */ |
||
| 26 | class Html2Text |
||
| 27 | { |
||
| 28 | /** |
||
| 29 | * Contains the HTML content to convert. |
||
| 30 | * |
||
| 31 | * @type string |
||
| 32 | */ |
||
| 33 | protected $html; |
||
| 34 | |||
| 35 | /** |
||
| 36 | * Contains the converted, formatted text. |
||
| 37 | * |
||
| 38 | * @type string |
||
| 39 | */ |
||
| 40 | protected $text; |
||
| 41 | |||
| 42 | /** |
||
| 43 | * Maximum width of the formatted text, in columns. |
||
| 44 | * |
||
| 45 | * Set this value to 0 (or less) to ignore word wrapping |
||
| 46 | * and not constrain text to a fixed-width column. |
||
| 47 | * |
||
| 48 | * @type int |
||
| 49 | */ |
||
| 50 | protected $width = 70; |
||
| 51 | |||
| 52 | /** |
||
| 53 | * List of preg* regular expression patterns to search for, |
||
| 54 | * used in conjunction with $replace. |
||
| 55 | * |
||
| 56 | * @type array |
||
| 57 | * @see $replace |
||
| 58 | */ |
||
| 59 | protected $search = [ |
||
| 60 | "/\r/", // Non-legal carriage return |
||
| 61 | "/[\n\t]+/", // Newlines and tabs |
||
| 62 | '/<head[^>]*>.*?<\/head>/i', // <head> |
||
| 63 | '/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with |
||
| 64 | '/<style[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with |
||
| 65 | '/<p[^>]*>/i', // <P> |
||
| 66 | '/<br[^>]*>/i', // <br> |
||
| 67 | '/<i[^>]*>(.*?)<\/i>/i', // <i> |
||
| 68 | '/<em[^>]*>(.*?)<\/em>/i', // <em> |
||
| 69 | '/(<ul[^>]*>|<\/ul>)/i', // <ul> and </ul> |
||
| 70 | '/(<ol[^>]*>|<\/ol>)/i', // <ol> and </ol> |
||
| 71 | '/(<dl[^>]*>|<\/dl>)/i', // <dl> and </dl> |
||
| 72 | '/<li[^>]*>(.*?)<\/li>/i', // <li> and </li> |
||
| 73 | '/<dd[^>]*>(.*?)<\/dd>/i', // <dd> and </dd> |
||
| 74 | '/<dt[^>]*>(.*?)<\/dt>/i', // <dt> and </dt> |
||
| 75 | '/<li[^>]*>/i', // <li> |
||
| 76 | '/<hr[^>]*>/i', // <hr> |
||
| 77 | '/<div[^>]*>/i', // <div> |
||
| 78 | '/(<table[^>]*>|<\/table>)/i', // <table> and </table> |
||
| 79 | '/(<tr[^>]*>|<\/tr>)/i', // <tr> and </tr> |
||
| 80 | '/<td[^>]*>(.*?)<\/td>/i', // <td> and </td> |
||
| 81 | '/<span class="_html2text_ignore">.+?<\/span>/i', // <span class="_html2text_ignore">...</span> |
||
| 82 | ]; |
||
| 83 | |||
| 84 | /** |
||
| 85 | * List of pattern replacements corresponding to patterns searched. |
||
| 86 | * |
||
| 87 | * @type array |
||
| 88 | * @see $search |
||
| 89 | */ |
||
| 90 | protected $replace = [ |
||
| 91 | '', // Non-legal carriage return |
||
| 92 | ' ', // Newlines and tabs |
||
| 93 | '', // <head> |
||
| 94 | '', // <script>s -- which strip_tags supposedly has problems with |
||
| 95 | '', // <style>s -- which strip_tags supposedly has problems with |
||
| 96 | "\n\n", // <P> |
||
| 97 | "\n", // <br> |
||
| 98 | '_\\1_', // <i> |
||
| 99 | '_\\1_', // <em> |
||
| 100 | "\n\n", // <ul> and </ul> |
||
| 101 | "\n\n", // <ol> and </ol> |
||
| 102 | "\n\n", // <dl> and </dl> |
||
| 103 | "\t* \\1\n", // <li> and </li> |
||
| 104 | " \\1\n", // <dd> and </dd> |
||
| 105 | "\t* \\1", // <dt> and </dt> |
||
| 106 | "\n\t* ", // <li> |
||
| 107 | "\n-------------------------\n", // <hr> |
||
| 108 | "<div>\n", // <div> |
||
| 109 | "\n\n", // <table> and </table> |
||
| 110 | "\n", // <tr> and </tr> |
||
| 111 | "\t\t\\1\n", // <td> and </td> |
||
| 112 | '', // <span class="_html2text_ignore">...</span> |
||
| 113 | ]; |
||
| 114 | |||
| 115 | /** |
||
| 116 | * List of preg* regular expression patterns to search for, |
||
| 117 | * used in conjunction with $ent_replace. |
||
| 118 | * |
||
| 119 | * @type array |
||
| 120 | * @see $ent_replace |
||
| 121 | */ |
||
| 122 | protected $ent_search = [ |
||
| 123 | '/&(nbsp|#160);/i', // Non-breaking space |
||
| 124 | '/&(quot|rdquo|ldquo|#8220|#8221|#147|#148);/i', |
||
| 125 | // Double quotes |
||
| 126 | '/&(apos|rsquo|lsquo|#8216|#8217);/i', // Single quotes |
||
| 127 | '/>/i', // Greater-than |
||
| 128 | '/</i', // Less-than |
||
| 129 | '/&(copy|#169);/i', // Copyright |
||
| 130 | '/&(trade|#8482|#153);/i', // Trademark |
||
| 131 | '/&(reg|#174);/i', // Registered |
||
| 132 | '/&(mdash|#151|#8212);/i', // mdash |
||
| 133 | '/&(ndash|minus|#8211|#8722);/i', // ndash |
||
| 134 | '/&(bull|#149|#8226);/i', // Bullet |
||
| 135 | '/&(pound|#163);/i', // Pound sign |
||
| 136 | '/&(euro|#8364);/i', // Euro sign |
||
| 137 | '/&(amp|#38);/i', // Ampersand: see _converter() |
||
| 138 | '/[ ]{2,}/', // Runs of spaces, post-handling |
||
| 139 | ]; |
||
| 140 | |||
| 141 | /** |
||
| 142 | * List of pattern replacements corresponding to patterns searched. |
||
| 143 | * |
||
| 144 | * @type array |
||
| 145 | * @see $ent_search |
||
| 146 | */ |
||
| 147 | protected $ent_replace = [ |
||
| 148 | ' ', // Non-breaking space |
||
| 149 | '"', // Double quotes |
||
| 150 | "'", // Single quotes |
||
| 151 | '>', |
||
| 152 | '<', |
||
| 153 | '(c)', |
||
| 154 | '(tm)', |
||
| 155 | '(R)', |
||
| 156 | '--', |
||
| 157 | '-', |
||
| 158 | '*', |
||
| 159 | '£', |
||
| 160 | 'EUR', // Euro sign. € ? |
||
| 161 | '|+|amp|+|', // Ampersand: see _converter() |
||
| 162 | ' ', // Runs of spaces, post-handling |
||
| 163 | ]; |
||
| 164 | |||
| 165 | /** |
||
| 166 | * List of preg* regular expression patterns to search for |
||
| 167 | * and replace using callback function. |
||
| 168 | * |
||
| 169 | * @type array |
||
| 170 | */ |
||
| 171 | protected $callback_search = [ |
||
| 172 | '/<(a) [^>]*href=("|\')([^"\']+)\2([^>]*)>(.*?)<\/a>/i', // <a href=""> |
||
| 173 | '/<(h)[123456]( [^>]*)?>(.*?)<\/h[123456]>/i', // h1 - h6 |
||
| 174 | '/<(b)( [^>]*)?>(.*?)<\/b>/i', // <b> |
||
| 175 | '/<(strong)( [^>]*)?>(.*?)<\/strong>/i', // <strong> |
||
| 176 | '/<(th)( [^>]*)?>(.*?)<\/th>/i', // <th> and </th> |
||
| 177 | ]; |
||
| 178 | |||
| 179 | /** |
||
| 180 | * List of preg* regular expression patterns to search for in PRE body, |
||
| 181 | * used in conjunction with $pre_replace. |
||
| 182 | * |
||
| 183 | * @type array |
||
| 184 | * @see $pre_replace |
||
| 185 | */ |
||
| 186 | protected $pre_search = [ |
||
| 187 | "/\n/", |
||
| 188 | "/\t/", |
||
| 189 | '/ /', |
||
| 190 | '/<pre[^>]*>/', |
||
| 191 | '/<\/pre>/', |
||
| 192 | ]; |
||
| 193 | |||
| 194 | /** |
||
| 195 | * List of pattern replacements corresponding to patterns searched for PRE body. |
||
| 196 | * |
||
| 197 | * @type array |
||
| 198 | * @see $pre_search |
||
| 199 | */ |
||
| 200 | protected $pre_replace = [ |
||
| 201 | '<br>', |
||
| 202 | ' ', |
||
| 203 | ' ', |
||
| 204 | '', |
||
| 205 | '', |
||
| 206 | ]; |
||
| 207 | |||
| 208 | /** |
||
| 209 | * Temporary workspace used during PRE processing. |
||
| 210 | * |
||
| 211 | * @type string |
||
| 212 | */ |
||
| 213 | protected $pre_content = ''; |
||
| 214 | |||
| 215 | /** |
||
| 216 | * Contains a list of HTML tags to allow in the resulting text. |
||
| 217 | * |
||
| 218 | * @type string |
||
| 219 | * @see set_allowed_tags() |
||
| 220 | */ |
||
| 221 | protected $allowed_tags = ''; |
||
| 222 | |||
| 223 | /** |
||
| 224 | * Contains the base URL that relative links should resolve to. |
||
| 225 | * |
||
| 226 | * @type string |
||
| 227 | */ |
||
| 228 | protected $url; |
||
| 229 | |||
| 230 | /** |
||
| 231 | * Indicates whether content in the $html variable has been converted yet. |
||
| 232 | * |
||
| 233 | * @type bool |
||
| 234 | * @see $html, $text |
||
| 235 | */ |
||
| 236 | protected $_converted = false; |
||
| 237 | |||
| 238 | /** |
||
| 239 | * Contains URL addresses from links to be rendered in plain text. |
||
| 240 | * |
||
| 241 | * @type array |
||
| 242 | * @see _build_link_list() |
||
| 243 | */ |
||
| 244 | protected $_link_list = []; |
||
| 245 | |||
| 246 | /** |
||
| 247 | * Various configuration options (able to be set in the constructor) |
||
| 248 | * |
||
| 249 | * @type array |
||
| 250 | */ |
||
| 251 | protected $_options = [ |
||
| 252 | // 'none' |
||
| 253 | // 'inline' (show links inline) |
||
| 254 | // 'nextline' (show links on the next line) |
||
| 255 | // 'table' (if a table of link URLs should be listed after the text. |
||
| 256 | 'do_links' => 'inline', |
||
| 257 | // Maximum width of the formatted text, in columns. |
||
| 258 | // Set this value to 0 (or less) to ignore word wrapping |
||
| 259 | // and not constrain text to a fixed-width column. |
||
| 260 | 'width' => 70, |
||
| 261 | ]; |
||
| 262 | |||
| 263 | /** |
||
| 264 | * Constructor. |
||
| 265 | * |
||
| 266 | * If the HTML source string (or file) is supplied, the class |
||
| 267 | * will instantiate with that source propagated, all that has |
||
| 268 | * to be done it to call get_text(). |
||
| 269 | * |
||
| 270 | * @param string $source HTML content |
||
| 271 | * @param bool $from_file Indicates $source is a file to pull content from |
||
| 272 | * @param array $options Set configuration options |
||
| 273 | */ |
||
| 274 | public function __construct($source = '', $from_file = false, $options = []) |
||
| 275 | { |
||
| 276 | $this->_options = array_merge($this->_options, $options); |
||
| 277 | |||
| 278 | if (!empty($source)) { |
||
| 279 | $this->set_html($source, $from_file); |
||
| 280 | } |
||
| 281 | |||
| 282 | $this->set_base_url(); |
||
| 283 | } |
||
| 284 | |||
| 285 | /** |
||
| 286 | * Loads source HTML into memory, either from $source string or a file. |
||
| 287 | * |
||
| 288 | * @param string $source HTML content |
||
| 289 | * @param bool $from_file Indicates $source is a file to pull content from |
||
| 290 | */ |
||
| 291 | public function set_html($source, $from_file = false) |
||
| 292 | { |
||
| 293 | if ($from_file && file_exists($source)) { |
||
| 294 | $this->html = file_get_contents($source); |
||
| 295 | } else { |
||
| 296 | $this->html = $source; |
||
| 297 | } |
||
| 298 | |||
| 299 | $this->_converted = false; |
||
| 300 | } |
||
| 301 | |||
| 302 | /** |
||
| 303 | * Returns the text, converted from HTML. |
||
| 304 | * |
||
| 305 | * @return string |
||
| 306 | */ |
||
| 307 | public function get_text() |
||
| 308 | { |
||
| 309 | if (!$this->_converted) { |
||
| 310 | $this->_convert(); |
||
| 311 | } |
||
| 312 | |||
| 313 | return $this->text; |
||
| 314 | } |
||
| 315 | |||
| 316 | /** |
||
| 317 | * Prints the text, converted from HTML. |
||
| 318 | */ |
||
| 319 | public function print_text() |
||
| 320 | { |
||
| 321 | print $this->get_text(); |
||
| 322 | } |
||
| 323 | |||
| 324 | /** |
||
| 325 | * Alias to print_text(), operates identically. |
||
| 326 | * |
||
| 327 | * @see print_text() |
||
| 328 | */ |
||
| 329 | public function p() |
||
| 330 | { |
||
| 331 | print $this->get_text(); |
||
| 332 | } |
||
| 333 | |||
| 334 | /** |
||
| 335 | * Sets the allowed HTML tags to pass through to the resulting text. |
||
| 336 | * |
||
| 337 | * Tags should be in the form "<p>", with no corresponding closing tag. |
||
| 338 | * |
||
| 339 | * @param string $allowed_tags |
||
| 340 | */ |
||
| 341 | public function set_allowed_tags($allowed_tags = '') |
||
| 342 | { |
||
| 343 | if (!empty($allowed_tags)) { |
||
| 344 | $this->allowed_tags = $allowed_tags; |
||
| 345 | } |
||
| 346 | } |
||
| 347 | |||
| 348 | /** |
||
| 349 | * Sets a base URL to handle relative links. |
||
| 350 | * |
||
| 351 | * @param string $url |
||
| 352 | */ |
||
| 353 | public function set_base_url($url = '') |
||
| 354 | { |
||
| 355 | if (empty($url)) { |
||
| 356 | if (\Xmf\Request::hasVar('HTTP_HOST', 'SERVER')) { |
||
| 357 | $this->url = 'http://' . $_SERVER['HTTP_HOST']; |
||
| 358 | } else { |
||
| 359 | $this->url = ''; |
||
| 360 | } |
||
| 361 | } else { |
||
| 362 | // Strip any trailing slashes for consistency (relative |
||
| 363 | // URLs may already start with a slash like "/file.html") |
||
| 364 | if ('/' === mb_substr($url, -1)) { |
||
| 365 | $url = mb_substr($url, 0, -1); |
||
| 366 | } |
||
| 367 | $this->url = $url; |
||
| 368 | } |
||
| 369 | } |
||
| 370 | |||
| 371 | /** |
||
| 372 | * Workhorse function that does actual conversion (calls _converter() method). |
||
| 373 | */ |
||
| 374 | protected function _convert() |
||
| 375 | { |
||
| 376 | // Variables used for building the link list |
||
| 377 | $this->_link_list = []; |
||
| 378 | |||
| 379 | $text = trim(stripslashes($this->html)); |
||
| 380 | |||
| 381 | // Convert HTML to TXT |
||
| 382 | $this->_converter($text); |
||
| 383 | |||
| 384 | // Add link list |
||
| 385 | if (!empty($this->_link_list)) { |
||
| 386 | $text .= "\n\nLinks:\n------\n"; |
||
| 387 | foreach ($this->_link_list as $idx => $url) { |
||
| 388 | $text .= '[' . ($idx + 1) . '] ' . $url . "\n"; |
||
| 389 | } |
||
| 390 | } |
||
| 391 | |||
| 392 | $this->text = $text; |
||
| 393 | |||
| 394 | $this->_converted = true; |
||
| 395 | } |
||
| 396 | |||
| 397 | /** |
||
| 398 | * Workhorse function that does actual conversion. |
||
| 399 | * |
||
| 400 | * First performs custom tag replacement specified by $search and |
||
| 401 | * $replace arrays. Then strips any remaining HTML tags, reduces whitespace |
||
| 402 | * and newlines to a readable format, and word wraps the text to |
||
| 403 | * $this->_options['width'] characters. |
||
| 404 | * |
||
| 405 | * @param string $text Reference to HTML content string |
||
| 406 | */ |
||
| 407 | protected function _converter(&$text) |
||
| 408 | { |
||
| 409 | // Convert <BLOCKQUOTE> (before PRE!) |
||
| 410 | $this->_convert_blockquotes($text); |
||
| 411 | |||
| 412 | // Convert <PRE> |
||
| 413 | $this->_convert_pre($text); |
||
| 414 | |||
| 415 | // Run our defined tags search-and-replace |
||
| 416 | $text = preg_replace($this->search, $this->replace, $text); |
||
| 417 | |||
| 418 | // Run our defined tags search-and-replace with callback |
||
| 419 | $text = preg_replace_callback($this->callback_search, [$this, '_preg_callback'], $text); |
||
| 420 | |||
| 421 | // Strip any other HTML tags |
||
| 422 | $text = strip_tags($text, $this->allowed_tags); |
||
| 423 | |||
| 424 | // Run our defined entities/characters search-and-replace |
||
| 425 | $text = preg_replace($this->ent_search, $this->ent_replace, $text); |
||
| 426 | |||
| 427 | // Replace known html entities |
||
| 428 | $text = html_entity_decode($text, ENT_QUOTES); |
||
| 429 | |||
| 430 | // Remove unknown/unhandled entities (this cannot be done in search-and-replace block) |
||
| 431 | $text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text); |
||
| 432 | |||
| 433 | // Convert "|+|amp|+|" into "&", need to be done after handling of unknown entities |
||
| 434 | // This properly handles situation of "&quot;" in input string |
||
| 435 | $text = str_replace('|+|amp|+|', '&', $text); |
||
| 436 | |||
| 437 | // Bring down number of empty lines to 2 max |
||
| 438 | $text = preg_replace("/\n\s+\n/", "\n\n", $text); |
||
| 439 | $text = preg_replace("/[\n]{3,}/", "\n\n", $text); |
||
| 440 | |||
| 441 | // remove leading empty lines (can be produced by eg. P tag on the beginning) |
||
| 442 | $text = ltrim($text, "\n"); |
||
| 443 | |||
| 444 | // Wrap the text to a readable format |
||
| 445 | // for PHP versions >= 4.0.2. Default width is 75 |
||
| 446 | // If width is 0 or less, don't wrap the text. |
||
| 447 | if ($this->_options['width'] > 0) { |
||
| 448 | $text = wordwrap($text, $this->_options['width']); |
||
| 449 | } |
||
| 450 | } |
||
| 451 | |||
| 452 | /** |
||
| 453 | * Helper function called by preg_replace() on link replacement. |
||
| 454 | * |
||
| 455 | * Maintains an internal list of links to be displayed at the end of the |
||
| 456 | * text, with numeric indices to the original point in the text they |
||
| 457 | * appeared. Also makes an effort at identifying and handling absolute |
||
| 458 | * and relative links. |
||
| 459 | * |
||
| 460 | * @param string $link URL of the link |
||
| 461 | * @param string $display Part of the text to associate number with |
||
| 462 | * @param string|null $link_override |
||
| 463 | * |
||
| 464 | * @return string |
||
| 465 | */ |
||
| 466 | protected function _build_link_list($link, $display, $link_override = null) |
||
| 467 | { |
||
| 468 | $link_method = $link_override ?: $this->_options['do_links']; |
||
| 469 | if ('none' === $link_method) { |
||
| 470 | return $display; |
||
| 471 | } |
||
| 472 | |||
| 473 | // Ignored link types |
||
| 474 | if (preg_match('!^(javascript:|mailto:|#)!i', $link)) { |
||
| 475 | return $display; |
||
| 476 | } |
||
| 477 | |||
| 478 | if (preg_match('!^([a-z][a-z0-9.+-]+:)!i', $link)) { |
||
| 479 | $url = $link; |
||
| 480 | } else { |
||
| 481 | $url = $this->url; |
||
| 482 | if ('/' !== mb_substr($link, 0, 1)) { |
||
| 483 | $url .= '/'; |
||
| 484 | } |
||
| 485 | $url .= (string)$link; |
||
| 486 | } |
||
| 487 | |||
| 488 | if ('table' === $link_method) { |
||
| 489 | if (false === ($index = array_search($url, $this->_link_list, true))) { |
||
| 490 | $index = count($this->_link_list); |
||
| 491 | $this->_link_list[] = $url; |
||
| 492 | } |
||
| 493 | |||
| 494 | return $display . ' [' . ($index + 1) . ']'; |
||
| 495 | } elseif ('nextline' === $link_method) { |
||
| 496 | return $display . "\n[" . $url . ']'; |
||
| 497 | } // link_method defaults to inline |
||
| 498 | |||
| 499 | return $display . ' [' . $url . ']'; |
||
| 500 | } |
||
| 501 | |||
| 502 | /** |
||
| 503 | * Helper function for PRE body conversion. |
||
| 504 | * |
||
| 505 | * @param string $text HTML content |
||
| 506 | */ |
||
| 507 | protected function _convert_pre(&$text) |
||
| 508 | { |
||
| 509 | // get the content of PRE element |
||
| 510 | while (preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches)) { |
||
| 511 | $this->pre_content = $matches[1]; |
||
| 512 | |||
| 513 | // Run our defined tags search-and-replace with callback |
||
| 514 | $this->pre_content = preg_replace_callback($this->callback_search, [$this, '_preg_callback'], $this->pre_content); |
||
| 515 | |||
| 516 | // convert the content |
||
| 517 | $this->pre_content = sprintf('<div><br>%s<br></div>', preg_replace($this->pre_search, $this->pre_replace, $this->pre_content)); |
||
| 518 | |||
| 519 | // replace the content (use callback because content can contain $0 variable) |
||
| 520 | $text = preg_replace_callback('/<pre[^>]*>.*<\/pre>/ismU', [$this, '_preg_pre_callback'], $text, 1); |
||
| 521 | |||
| 522 | // free memory |
||
| 523 | $this->pre_content = ''; |
||
| 524 | } |
||
| 525 | } |
||
| 526 | |||
| 527 | /** |
||
| 528 | * Helper function for BLOCKQUOTE body conversion. |
||
| 529 | * |
||
| 530 | * @param string $text HTML content |
||
| 531 | */ |
||
| 532 | protected function _convert_blockquotes(&$text) |
||
| 533 | { |
||
| 534 | if (preg_match_all('/<\/*blockquote[^>]*>/i', $text, $matches, PREG_OFFSET_CAPTURE)) { |
||
| 535 | $start = 0; |
||
| 536 | $taglen = 0; |
||
| 537 | $level = 0; |
||
| 538 | $diff = 0; |
||
| 539 | foreach ($matches[0] as $m) { |
||
| 540 | if ('<' === $m[0][0] && '/' === $m[0][1]) { |
||
| 541 | $level--; |
||
| 542 | if ($level < 0) { |
||
| 543 | $level = 0; // malformed HTML: go to next blockquote |
||
| 544 | } elseif ($level > 0) { |
||
| 545 | // skip inner blockquote |
||
| 546 | } else { |
||
| 547 | $end = $m[1]; |
||
| 548 | $len = $end - $taglen - $start; |
||
| 549 | // Get blockquote content |
||
| 550 | $body = mb_substr($text, $start + $taglen - $diff, $len); |
||
| 551 | |||
| 552 | // Set text width |
||
| 553 | $p_width = $this->_options['width']; |
||
| 554 | if ($this->_options['width'] > 0) { |
||
| 555 | $this->_options['width'] -= 2; |
||
| 556 | } |
||
| 557 | // Convert blockquote content |
||
| 558 | $body = trim($body); |
||
| 559 | $this->_converter($body); |
||
| 560 | // Add citation markers and create PRE block |
||
| 561 | $body = preg_replace('/((^|\n)>*)/', '\\1> ', trim($body)); |
||
| 562 | $body = '<pre>' . htmlspecialchars($body, ENT_QUOTES | ENT_HTML5) . '</pre>'; |
||
| 563 | // Re-set text width |
||
| 564 | $this->_options['width'] = $p_width; |
||
| 565 | // Replace content |
||
| 566 | $text = mb_substr($text, 0, $start - $diff) . $body . mb_substr($text, $end + mb_strlen($m[0]) - $diff); |
||
| 567 | |||
| 568 | $diff = $len + $taglen + mb_strlen($m[0]) - mb_strlen($body); |
||
| 569 | unset($body); |
||
| 570 | } |
||
| 571 | } else { |
||
| 572 | if (0 == $level) { |
||
| 573 | $start = $m[1]; |
||
| 574 | $taglen = mb_strlen($m[0]); |
||
| 575 | } |
||
| 576 | $level++; |
||
| 577 | } |
||
| 578 | } |
||
| 579 | } |
||
| 580 | } |
||
| 581 | |||
| 582 | /** |
||
| 583 | * Callback function for preg_replace_callback use. |
||
| 584 | * |
||
| 585 | * @param array $matches PREG matches |
||
| 586 | * |
||
| 587 | * @return string |
||
| 588 | */ |
||
| 589 | protected function _preg_callback($matches) |
||
| 590 | { |
||
| 591 | switch (mb_strtolower($matches[1])) { |
||
| 592 | case 'b': |
||
| 593 | case 'strong': |
||
| 594 | return $this->_toupper($matches[3]); |
||
| 595 | case 'th': |
||
| 596 | return $this->_toupper("\t\t" . $matches[3] . "\n"); |
||
| 597 | case 'h': |
||
| 598 | return $this->_toupper("\n\n" . $matches[3] . "\n\n"); |
||
| 599 | case 'a': |
||
| 600 | // override the link method |
||
| 601 | $link_override = null; |
||
| 602 | if (preg_match('/_html2text_link_(\w+)/', $matches[4], $link_override_match)) { |
||
| 603 | $link_override = $link_override_match[1]; |
||
| 604 | } |
||
| 605 | // Remove spaces in URL (#1487805) |
||
| 606 | $url = str_replace(' ', '', $matches[3]); |
||
| 607 | |||
| 608 | return $this->_build_link_list($url, $matches[5], $link_override); |
||
| 609 | } |
||
| 610 | |||
| 611 | return ''; |
||
| 612 | } |
||
| 613 | |||
| 614 | /** |
||
| 615 | * Callback function for preg_replace_callback use in PRE content handler. |
||
| 616 | * |
||
| 617 | * @param array $matches PREG matches |
||
| 618 | * |
||
| 619 | * @return string |
||
| 620 | */ |
||
| 621 | protected function _preg_pre_callback( |
||
| 622 | /** @noinspection PhpUnusedParameterInspection */ |
||
| 623 | $matches) |
||
|
0 ignored issues
–
show
|
|||
| 624 | { |
||
| 625 | return $this->pre_content; |
||
| 626 | } |
||
| 627 | |||
| 628 | /** |
||
| 629 | * Strtoupper function with HTML tags and entities handling. |
||
| 630 | * |
||
| 631 | * @param string $str Text to convert |
||
| 632 | * |
||
| 633 | * @return string Converted text |
||
| 634 | */ |
||
| 635 | private function _toupper($str) |
||
| 636 | { |
||
| 637 | // string can contain HTML tags |
||
| 638 | $chunks = preg_split('/(<[^>]*>)/', $str, null, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); |
||
| 639 | |||
| 640 | // convert toupper only the text between HTML tags |
||
| 641 | foreach ($chunks as $idx => $chunk) { |
||
| 642 | if ('<' !== $chunk[0]) { |
||
| 643 | $chunks[$idx] = $this->_strtoupper($chunk); |
||
| 644 | } |
||
| 645 | } |
||
| 646 | |||
| 647 | return implode($chunks); |
||
| 648 | } |
||
| 649 | |||
| 650 | /** |
||
| 651 | * Strtoupper multibyte wrapper function with HTML entities handling. |
||
| 652 | * Forces mb_strtoupper-call to UTF-8. |
||
| 653 | * |
||
| 654 | * @param string $str Text to convert |
||
| 655 | * |
||
| 656 | * @return string Converted text |
||
| 657 | */ |
||
| 658 | private function _strtoupper($str) |
||
| 659 | { |
||
| 660 | $str = html_entity_decode($str, ENT_COMPAT); |
||
| 661 | |||
| 662 | if (function_exists('mb_strtoupper')) { |
||
| 663 | $str = mb_strtoupper($str, 'UTF-8'); |
||
| 664 | } else { |
||
| 665 | $str = mb_strtoupper($str); |
||
| 666 | } |
||
| 667 | |||
| 668 | $str = htmlspecialchars($str, ENT_COMPAT); |
||
| 669 | |||
| 670 | return $str; |
||
| 671 | } |
||
| 672 | } |
||
| 673 |
This check looks from parameters that have been defined for a function or method, but which are not used in the method body.