| @@ 335-378 (lines=44) @@ | ||
| 332 | * |
|
| 333 | * @return mixed |
|
| 334 | */ |
|
| 335 | public function html2text($document) |
|
| 336 | { |
|
| 337 | // PHP Manual:: function preg_replace |
|
| 338 | // $document should contain an HTML document. |
|
| 339 | // This will remove HTML tags, javascript sections |
|
| 340 | // and white space. It will also convert some |
|
| 341 | // common HTML entities to their text equivalent. |
|
| 342 | // Credits : newbb2 |
|
| 343 | $search = array( |
|
| 344 | "'<script[^>]*?>.*?</script>'si", // Strip out javascript<? |
|
| 345 | "'<img.*?/>'si", // Strip out img tags |
|
| 346 | "'<[\/\!]*?[^<>]*?>'si", // Strip out HTML tags<? |
|
| 347 | "'([\r\n])[\s]+'", // Strip out white space |
|
| 348 | "'&(quot|#34);'i", // Replace HTML entities |
|
| 349 | "'&(amp|#38);'i", |
|
| 350 | "'&(lt|#60);'i", |
|
| 351 | "'&(gt|#62);'i", |
|
| 352 | "'&(nbsp|#160);'i", |
|
| 353 | "'&(iexcl|#161);'i", |
|
| 354 | "'&(cent|#162);'i", |
|
| 355 | "'&(pound|#163);'i", |
|
| 356 | "'&(copy|#169);'i", |
|
| 357 | //"'&#(\d+);'e" |
|
| 358 | ); |
|
| 359 | // evaluate as php |
|
| 360 | $replace = array( |
|
| 361 | "", |
|
| 362 | "", |
|
| 363 | "", |
|
| 364 | "\\1", |
|
| 365 | "\"", |
|
| 366 | "&", |
|
| 367 | "<", |
|
| 368 | ">", |
|
| 369 | " ", |
|
| 370 | chr(161), |
|
| 371 | chr(162), |
|
| 372 | chr(163), |
|
| 373 | chr(169), |
|
| 374 | //"chr(\\1)" |
|
| 375 | ); |
|
| 376 | $text = preg_replace($search, $replace, $document); |
|
| 377 | return $text; |
|
| 378 | } |
|
| 379 | } |
|
| 380 | ||
| @@ 100-125 (lines=26) @@ | ||
| 97 | * |
|
| 98 | * @return string |
|
| 99 | */ |
|
| 100 | public static function html2text($document) |
|
| 101 | { |
|
| 102 | // PHP Manual:: function preg_replace |
|
| 103 | // $document should contain an HTML document. |
|
| 104 | // This will remove HTML tags, javascript sections |
|
| 105 | // and white space. It will also convert some |
|
| 106 | // common HTML entities to their text equivalent. |
|
| 107 | // Credits : newbb2 |
|
| 108 | $search = array( |
|
| 109 | "'<script[^>]*?>.*?</script>'si", // Strip out javascript |
|
| 110 | "'<img.*?/>'si", // Strip out img tags |
|
| 111 | "'<[\/\!]*?[^<>]*?>'si", // Strip out HTML tags |
|
| 112 | "'([\r\n])[\s]+'", // Strip out white space |
|
| 113 | "'&(quot|#34);'i", // Replace HTML entities |
|
| 114 | "'&(amp|#38);'i", "'&(lt|#60);'i", "'&(gt|#62);'i", "'&(nbsp|#160);'i", "'&(iexcl|#161);'i", |
|
| 115 | "'&(cent|#162);'i", "'&(pound|#163);'i", "'&(copy|#169);'i", "'&#(\d+);'e" |
|
| 116 | ); // evaluate as php |
|
| 117 | ||
| 118 | $replace = array( |
|
| 119 | "", "", "", "\\1", "\"", "&", "<", ">", " ", chr(161), chr(162), chr(163), chr(169), "chr(\\1)" |
|
| 120 | ); |
|
| 121 | ||
| 122 | $text = preg_replace($search, $replace, $document); |
|
| 123 | ||
| 124 | return $text; |
|
| 125 | } |
|
| 126 | ||
| 127 | /** |
|
| 128 | * @return string[] |
|