| @@ 398-449 (lines=52) @@ | ||
| 395 | * |
|
| 396 | * @return mixed |
|
| 397 | */ |
|
| 398 | public function html2text($document) |
|
| 399 | { |
|
| 400 | // PHP Manual:: function preg_replace |
|
| 401 | // $document should contain an HTML document. |
|
| 402 | // This will remove HTML tags, javascript sections |
|
| 403 | // and white space. It will also convert some |
|
| 404 | // common HTML entities to their text equivalent. |
|
| 405 | // Credits : newbb2 |
|
| 406 | $search = [ |
|
| 407 | "'<script[^>]*?>.*?</script>'si", // Strip out javascript |
|
| 408 | "'<img.*?>'si", // Strip out img tags |
|
| 409 | "'<[\/\!]*?[^<>]*?>'si", // Strip out HTML tags |
|
| 410 | "'([\r\n])[\s]+'", // Strip out white space |
|
| 411 | "'&(quot|#34);'i", // Replace HTML entities |
|
| 412 | "'&(amp|#38);'i", |
|
| 413 | "'&(lt|#60);'i", |
|
| 414 | "'&(gt|#62);'i", |
|
| 415 | "'&(nbsp|#160);'i", |
|
| 416 | "'&(iexcl|#161);'i", |
|
| 417 | "'&(cent|#162);'i", |
|
| 418 | "'&(pound|#163);'i", |
|
| 419 | "'&(copy|#169);'i" |
|
| 420 | ]; // evaluate as php |
|
| 421 | ||
| 422 | $replace = [ |
|
| 423 | '', |
|
| 424 | '', |
|
| 425 | '', |
|
| 426 | "\\1", |
|
| 427 | '"', |
|
| 428 | '&', |
|
| 429 | '<', |
|
| 430 | '>', |
|
| 431 | ' ', |
|
| 432 | chr(161), |
|
| 433 | chr(162), |
|
| 434 | chr(163), |
|
| 435 | chr(169) |
|
| 436 | ]; |
|
| 437 | ||
| 438 | $text = preg_replace($search, $replace, $document); |
|
| 439 | ||
| 440 | preg_replace_callback('/&#(\d+);/', function ($matches) { |
|
| 441 | return chr($matches[1]); |
|
| 442 | }, $document); |
|
| 443 | ||
| 444 | return $text; |
|
| 445 | } |
|
| 446 | } |
|
| 447 | ||
| @@ 443-491 (lines=49) @@ | ||
| 440 | * @param string $document |
|
| 441 | * @return mixed |
|
| 442 | */ |
|
| 443 | public static function html2text($document) |
|
| 444 | { |
|
| 445 | // PHP Manual:: function preg_replace |
|
| 446 | // $document should contain an HTML document. |
|
| 447 | // This will remove HTML tags, javascript sections |
|
| 448 | // and white space. It will also convert some |
|
| 449 | // common HTML entities to their text equivalent. |
|
| 450 | // Credits : newbb2 |
|
| 451 | $search = [ |
|
| 452 | "'<script[^>]*?>.*?</script>'si", // Strip out javascript |
|
| 453 | "'<img.*?>'si", // Strip out img tags |
|
| 454 | "'<[\/\!]*?[^<>]*?>'si", // Strip out HTML tags |
|
| 455 | "'([\r\n])[\s]+'", // Strip out white space |
|
| 456 | "'&(quot|#34);'i", // Replace HTML entities |
|
| 457 | "'&(amp|#38);'i", |
|
| 458 | "'&(lt|#60);'i", |
|
| 459 | "'&(gt|#62);'i", |
|
| 460 | "'&(nbsp|#160);'i", |
|
| 461 | "'&(iexcl|#161);'i", |
|
| 462 | "'&(cent|#162);'i", |
|
| 463 | "'&(pound|#163);'i", |
|
| 464 | "'&(copy|#169);'i" |
|
| 465 | ]; // evaluate as php |
|
| 466 | ||
| 467 | $replace = [ |
|
| 468 | '', |
|
| 469 | '', |
|
| 470 | '', |
|
| 471 | "\\1", |
|
| 472 | '"', |
|
| 473 | '&', |
|
| 474 | '<', |
|
| 475 | '>', |
|
| 476 | ' ', |
|
| 477 | chr(161), |
|
| 478 | chr(162), |
|
| 479 | chr(163), |
|
| 480 | chr(169) |
|
| 481 | ]; |
|
| 482 | ||
| 483 | $text = preg_replace($search, $replace, $document); |
|
| 484 | ||
| 485 | preg_replace_callback('/&#(\d+);/', function ($matches) { |
|
| 486 | return chr($matches[1]); |
|
| 487 | }, $document); |
|
| 488 | ||
| 489 | return $text; |
|
| 490 | //<?php |
|
| 491 | } |
|
| 492 | ||
| 493 | /** |
|
| 494 | * @return array |
|