@@ 335-378 (lines=44) @@ | ||
332 | * |
|
333 | * @return mixed |
|
334 | */ |
|
335 | public function html2text($document) |
|
336 | { |
|
337 | // PHP Manual:: function preg_replace |
|
338 | // $document should contain an HTML document. |
|
339 | // This will remove HTML tags, javascript sections |
|
340 | // and white space. It will also convert some |
|
341 | // common HTML entities to their text equivalent. |
|
342 | // Credits : newbb2 |
|
343 | $search = array( |
|
344 | "'<script[^>]*?>.*?</script>'si", // Strip out javascript<? |
|
345 | "'<img.*?/>'si", // Strip out img tags |
|
346 | "'<[\/\!]*?[^<>]*?>'si", // Strip out HTML tags<? |
|
347 | "'([\r\n])[\s]+'", // Strip out white space |
|
348 | "'&(quot|#34);'i", // Replace HTML entities |
|
349 | "'&(amp|#38);'i", |
|
350 | "'&(lt|#60);'i", |
|
351 | "'&(gt|#62);'i", |
|
352 | "'&(nbsp|#160);'i", |
|
353 | "'&(iexcl|#161);'i", |
|
354 | "'&(cent|#162);'i", |
|
355 | "'&(pound|#163);'i", |
|
356 | "'&(copy|#169);'i", |
|
357 | //"'&#(\d+);'e" |
|
358 | ); |
|
359 | // evaluate as php |
|
360 | $replace = array( |
|
361 | "", |
|
362 | "", |
|
363 | "", |
|
364 | "\\1", |
|
365 | "\"", |
|
366 | "&", |
|
367 | "<", |
|
368 | ">", |
|
369 | " ", |
|
370 | chr(161), |
|
371 | chr(162), |
|
372 | chr(163), |
|
373 | chr(169), |
|
374 | //"chr(\\1)" |
|
375 | ); |
|
376 | $text = preg_replace($search, $replace, $document); |
|
377 | return $text; |
|
378 | } |
|
379 | } |
|
380 |
@@ 100-125 (lines=26) @@ | ||
97 | * |
|
98 | * @return string |
|
99 | */ |
|
100 | public static function html2text($document) |
|
101 | { |
|
102 | // PHP Manual:: function preg_replace |
|
103 | // $document should contain an HTML document. |
|
104 | // This will remove HTML tags, javascript sections |
|
105 | // and white space. It will also convert some |
|
106 | // common HTML entities to their text equivalent. |
|
107 | // Credits : newbb2 |
|
108 | $search = array( |
|
109 | "'<script[^>]*?>.*?</script>'si", // Strip out javascript |
|
110 | "'<img.*?/>'si", // Strip out img tags |
|
111 | "'<[\/\!]*?[^<>]*?>'si", // Strip out HTML tags |
|
112 | "'([\r\n])[\s]+'", // Strip out white space |
|
113 | "'&(quot|#34);'i", // Replace HTML entities |
|
114 | "'&(amp|#38);'i", "'&(lt|#60);'i", "'&(gt|#62);'i", "'&(nbsp|#160);'i", "'&(iexcl|#161);'i", |
|
115 | "'&(cent|#162);'i", "'&(pound|#163);'i", "'&(copy|#169);'i", "'&#(\d+);'e" |
|
116 | ); // evaluate as php |
|
117 | ||
118 | $replace = array( |
|
119 | "", "", "", "\\1", "\"", "&", "<", ">", " ", chr(161), chr(162), chr(163), chr(169), "chr(\\1)" |
|
120 | ); |
|
121 | ||
122 | $text = preg_replace($search, $replace, $document); |
|
123 | ||
124 | return $text; |
|
125 | } |
|
126 | ||
127 | /** |
|
128 | * @return string[] |