@@ 1312-1317 (lines=6) @@ | ||
1309 | if (function_exists('get_last_retrieve_url_contents_content_type')) { |
|
1310 | $contentTypeHeader = get_last_retrieve_url_contents_content_type(); |
|
1311 | $success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches); |
|
1312 | if ($success) { |
|
1313 | $charset = $matches[1]; |
|
1314 | if (is_object($debug_object)) { |
|
1315 | $debug_object->debugLog(2, 'header content-type found charset of: ' . $charset); |
|
1316 | } |
|
1317 | } |
|
1318 | } |
|
1319 | ||
1320 | if (empty($charset)) { |
|
@@ 1330-1338 (lines=9) @@ | ||
1327 | ||
1328 | if (!empty($fullvalue)) { |
|
1329 | $success = preg_match('/charset=(.+)/', $fullvalue, $matches); |
|
1330 | if ($success) { |
|
1331 | $charset = $matches[1]; |
|
1332 | } else { |
|
1333 | // If there is a meta tag, and they don't specify the character set, research says that it's typically ISO-8859-1 |
|
1334 | if (is_object($debug_object)) { |
|
1335 | $debug_object->debugLog(2, 'meta content-type tag couldn\'t be parsed. using iso-8859 default.'); |
|
1336 | } |
|
1337 | $charset = 'ISO-8859-1'; |
|
1338 | } |
|
1339 | } |
|
1340 | } |
|
1341 | } |
|
@@ 1352-1357 (lines=6) @@ | ||
1349 | } |
|
1350 | ||
1351 | // and if this doesn't work... then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need... |
|
1352 | if ($charset === false) { |
|
1353 | if (is_object($debug_object)) { |
|
1354 | $debug_object->debugLog(2, 'since mb_detect failed - using default of utf-8'); |
|
1355 | } |
|
1356 | $charset = 'UTF-8'; |
|
1357 | } |
|
1358 | } |
|
1359 | ||
1360 | // Since CP1252 is a superset, if we get one of it's subsets, we want it instead. |