| @@ 257-304 (lines=48) @@ | ||
| 254 | * |
|
| 255 | * @return mixed |
|
| 256 | */ |
|
| 257 | public static function html2text($document) |
|
| 258 | { |
|
| 259 | // PHP Manual:: function preg_replace |
|
| 260 | // $document should contain an HTML document. |
|
| 261 | // This will remove HTML tags, javascript sections |
|
| 262 | // and white space. It will also convert some |
|
| 263 | // common HTML entities to their text equivalent. |
|
| 264 | ||
| 265 | $search = [ |
|
| 266 | "'<script[^>]*?>.*?</script>'si", // Strip out javascript |
|
| 267 | "'<img.*?>'si", // Strip out img tags |
|
| 268 | "'<[\/\!]*?[^<>]*?>'si", // Strip out HTML tags |
|
| 269 | "'([\r\n])[\s]+'", // Strip out white space |
|
| 270 | "'&(quot|#34);'i", // Replace HTML entities |
|
| 271 | "'&(amp|#38);'i", |
|
| 272 | "'&(lt|#60);'i", |
|
| 273 | "'&(gt|#62);'i", |
|
| 274 | "'&(nbsp|#160);'i", |
|
| 275 | "'&(iexcl|#161);'i", |
|
| 276 | "'&(cent|#162);'i", |
|
| 277 | "'&(pound|#163);'i", |
|
| 278 | "'&(copy|#169);'i" |
|
| 279 | ]; // evaluate as php |
|
| 280 | ||
| 281 | $replace = [ |
|
| 282 | '', |
|
| 283 | '', |
|
| 284 | '', |
|
| 285 | "\\1", |
|
| 286 | '"', |
|
| 287 | '&', |
|
| 288 | '<', |
|
| 289 | '>', |
|
| 290 | ' ', |
|
| 291 | chr(161), |
|
| 292 | chr(162), |
|
| 293 | chr(163), |
|
| 294 | chr(169), |
|
| 295 | ]; |
|
| 296 | ||
| 297 | $text = preg_replace($search, $replace, $document); |
|
| 298 | ||
| 299 | preg_replace_callback('/&#(\d+);/', function ($matches) { |
|
| 300 | return chr($matches[1]); |
|
| 301 | }, $document); |
|
| 302 | ||
| 303 | return $text; |
|
| 304 | } |
|
| 305 | ||
| 306 | /** |
|
| 307 | * Is Xoops 2.3.x ? |
|
| @@ 135-182 (lines=48) @@ | ||
| 132 | * |
|
| 133 | * @return mixed |
|
| 134 | */ |
|
| 135 | function news_html2text($document) |
|
| 136 | { |
|
| 137 | // PHP Manual:: function preg_replace |
|
| 138 | // $document should contain an HTML document. |
|
| 139 | // This will remove HTML tags, javascript sections |
|
| 140 | // and white space. It will also convert some |
|
| 141 | // common HTML entities to their text equivalent. |
|
| 142 | ||
| 143 | $search = [ |
|
| 144 | "'<script[^>]*?>.*?</script>'si", // Strip out javascript |
|
| 145 | "'<img.*?>'si", // Strip out img tags |
|
| 146 | "'<[\/\!]*?[^<>]*?>'si", // Strip out HTML tags |
|
| 147 | "'([\r\n])[\s]+'", // Strip out white space |
|
| 148 | "'&(quot|#34);'i", // Replace HTML entities |
|
| 149 | "'&(amp|#38);'i", |
|
| 150 | "'&(lt|#60);'i", |
|
| 151 | "'&(gt|#62);'i", |
|
| 152 | "'&(nbsp|#160);'i", |
|
| 153 | "'&(iexcl|#161);'i", |
|
| 154 | "'&(cent|#162);'i", |
|
| 155 | "'&(pound|#163);'i", |
|
| 156 | "'&(copy|#169);'i" |
|
| 157 | ]; // evaluate as php |
|
| 158 | ||
| 159 | $replace = [ |
|
| 160 | '', |
|
| 161 | '', |
|
| 162 | '', |
|
| 163 | "\\1", |
|
| 164 | '"', |
|
| 165 | '&', |
|
| 166 | '<', |
|
| 167 | '>', |
|
| 168 | ' ', |
|
| 169 | chr(161), |
|
| 170 | chr(162), |
|
| 171 | chr(163), |
|
| 172 | chr(169), |
|
| 173 | ]; |
|
| 174 | ||
| 175 | $text = preg_replace($search, $replace, $document); |
|
| 176 | ||
| 177 | preg_replace_callback('/&#(\d+);/', function ($matches) { |
|
| 178 | return chr($matches[1]); |
|
| 179 | }, $document); |
|
| 180 | ||
| 181 | return $text; |
|
| 182 | } |
|
| 183 | ||
| 184 | /** |
|
| 185 | * Is Xoops 2.3.x ? |
|