| @@ 395-446 (lines=52) @@ | ||
| 392 | * |
|
| 393 | * @return mixed |
|
| 394 | */ |
|
| 395 | public function html2text($document) |
|
| 396 | { |
|
| 397 | // PHP Manual:: function preg_replace |
|
| 398 | // $document should contain an HTML document. |
|
| 399 | // This will remove HTML tags, javascript sections |
|
| 400 | // and white space. It will also convert some |
|
| 401 | // common HTML entities to their text equivalent. |
|
| 402 | // Credits : newbb2 |
|
| 403 | $search = [ |
|
| 404 | "'<script[^>]*?>.*?</script>'si", // Strip out javascript |
|
| 405 | "'<img.*?>'si", // Strip out img tags |
|
| 406 | "'<[\/\!]*?[^<>]*?>'si", // Strip out HTML tags |
|
| 407 | "'([\r\n])[\s]+'", // Strip out white space |
|
| 408 | "'&(quot|#34);'i", // Replace HTML entities |
|
| 409 | "'&(amp|#38);'i", |
|
| 410 | "'&(lt|#60);'i", |
|
| 411 | "'&(gt|#62);'i", |
|
| 412 | "'&(nbsp|#160);'i", |
|
| 413 | "'&(iexcl|#161);'i", |
|
| 414 | "'&(cent|#162);'i", |
|
| 415 | "'&(pound|#163);'i", |
|
| 416 | "'&(copy|#169);'i" |
|
| 417 | ]; // evaluate as php |
|
| 418 | ||
| 419 | $replace = [ |
|
| 420 | '', |
|
| 421 | '', |
|
| 422 | '', |
|
| 423 | "\\1", |
|
| 424 | '"', |
|
| 425 | '&', |
|
| 426 | '<', |
|
| 427 | '>', |
|
| 428 | ' ', |
|
| 429 | chr(161), |
|
| 430 | chr(162), |
|
| 431 | chr(163), |
|
| 432 | chr(169) |
|
| 433 | ]; |
|
| 434 | ||
| 435 | $text = preg_replace($search, $replace, $document); |
|
| 436 | ||
| 437 | preg_replace_callback('/&#(\d+);/', function ($matches) { |
|
| 438 | return chr($matches[1]); |
|
| 439 | }, $document); |
|
| 440 | ||
| 441 | return $text; |
|
| 442 | } |
|
| 443 | } |
|
| 444 | ||
| @@ 433-481 (lines=49) @@ | ||
| 430 | * @param string $document |
|
| 431 | * @return mixed |
|
| 432 | */ |
|
| 433 | public static function html2text($document) |
|
| 434 | { |
|
| 435 | // PHP Manual:: function preg_replace |
|
| 436 | // $document should contain an HTML document. |
|
| 437 | // This will remove HTML tags, javascript sections |
|
| 438 | // and white space. It will also convert some |
|
| 439 | // common HTML entities to their text equivalent. |
|
| 440 | // Credits : newbb2 |
|
| 441 | $search = [ |
|
| 442 | "'<script[^>]*?>.*?</script>'si", // Strip out javascript |
|
| 443 | "'<img.*?>'si", // Strip out img tags |
|
| 444 | "'<[\/\!]*?[^<>]*?>'si", // Strip out HTML tags |
|
| 445 | "'([\r\n])[\s]+'", // Strip out white space |
|
| 446 | "'&(quot|#34);'i", // Replace HTML entities |
|
| 447 | "'&(amp|#38);'i", |
|
| 448 | "'&(lt|#60);'i", |
|
| 449 | "'&(gt|#62);'i", |
|
| 450 | "'&(nbsp|#160);'i", |
|
| 451 | "'&(iexcl|#161);'i", |
|
| 452 | "'&(cent|#162);'i", |
|
| 453 | "'&(pound|#163);'i", |
|
| 454 | "'&(copy|#169);'i" |
|
| 455 | ]; // evaluate as php |
|
| 456 | ||
| 457 | $replace = [ |
|
| 458 | '', |
|
| 459 | '', |
|
| 460 | '', |
|
| 461 | "\\1", |
|
| 462 | '"', |
|
| 463 | '&', |
|
| 464 | '<', |
|
| 465 | '>', |
|
| 466 | ' ', |
|
| 467 | chr(161), |
|
| 468 | chr(162), |
|
| 469 | chr(163), |
|
| 470 | chr(169) |
|
| 471 | ]; |
|
| 472 | ||
| 473 | $text = preg_replace($search, $replace, $document); |
|
| 474 | ||
| 475 | preg_replace_callback('/&#(\d+);/', function ($matches) { |
|
| 476 | return chr($matches[1]); |
|
| 477 | }, $document); |
|
| 478 | ||
| 479 | return $text; |
|
| 480 | //<?php |
|
| 481 | } |
|
| 482 | ||
| 483 | /** |
|
| 484 | * @return array |
|